In [1]:
import pandas as pd
import boto3

# ==== Step 1: Generate Sample DataFrame ====
data = {
    'age': [25, 45, 35, 52, 29, 40],
    'income': [40000, 80000, 60000, 90000, 48000, 75000],
    'click_rate': [0.03, 0.12, 0.05, 0.09, 0.02, 0.11],
    'is_mobile': [1, 0, 1, 0, 1, 0],
    'target': [0, 1, 0, 1, 0, 1]
}

df = pd.DataFrame(data)
csv_filename = 'train.csv'
df.to_csv(csv_filename, index=False)

print(f"✅ CSV file '{csv_filename}' created successfully.")

# ==== Step 2: Upload to S3 ====
bucket_name = 'vocasync'
s3_key = 'data/train.csv'

# Initialize S3 client (uses default profile or SageMaker role)
s3 = boto3.client('s3')

try:
    s3.upload_file(csv_filename, bucket_name, s3_key)
    print(f"✅ Successfully uploaded '{csv_filename}' to 's3://{bucket_name}/{s3_key}'")
except Exception as e:
    print(f"❌ Failed to upload to S3: {e}")


✅ CSV file 'train.csv' created successfully.
✅ Successfully uploaded 'train.csv' to 's3://vocasync/data/train.csv'


In [3]:
from sagemaker.estimator import Estimator
from sagemaker import get_execution_role

role = 'arn:aws:iam::241533125856:role/SageMakerExecutionRole-Interns'
print("about to start an instance ");
estimator = Estimator(
    image_uri='683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.3-1',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    use_spot_instances=True,
    max_run=1800,
    max_wait=3600,
    checkpoint_s3_uri='s3://vocasync/checkpoints/',
    output_path='s3://vocasync/model/',
    entry_point='train.py',
    source_dir='src',
    base_job_name='xgb-spot-job'
)


estimator.fit({'train': 's3://vocasync/data/train.csv'})

print("completed");


about to start an instance 


In [None]:
from sagemaker.estimator import Estimator
from sagemaker import get_execution_role

role = 'arn:aws:iam::241533125856:role/SageMakerExecutionRole-Interns'
print("about to start an instance ");
estimator = Estimator(
    image_uri='683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.3-1',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    use_spot_instances=True,
    max_run=1800,
    max_wait=3600,
    checkpoint_s3_uri='s3://vocasync/checkpoints/',
    output_path='s3://vocasync/model/',
    entry_point='train.py',
    source_dir='src',
    base_job_name='xgb-spot-job'
)


estimator.fit({'train': 's3://vocasync/data/train.csv'})

print("completed");
