## Ingest data to S3
### upload raw data and cleaned data to S3

In [10]:
import pandas as pd
import boto3
import sagemaker
from sagemaker import Session
import os

In [11]:
## Set up SageMaker session and role for AWS profile "sagemaker"
boto3.setup_default_session(profile_name="sagemaker")

# Create SageMaker session and role
sess = sagemaker.Session()
role = sagemaker.get_execution_role()
region = sess.boto_region_name

# Create AWS clients
sm_client = boto3.client("sagemaker", region_name=region)
sts_client = boto3.client("sts", region_name=region)

bucket = sess.default_bucket()
prefix = "horizon-capital-forecasting"

print(f"SageMaker session set up with role: {role}")
print(f"Region: {region}")
print(f"Default bucket: {bucket}")

SageMaker session set up with role: arn:aws:iam::306617143793:role/sagemaker-execution-role
Region: us-east-1
Default bucket: sagemaker-us-east-1-306617143793


In [14]:
## raw data path
raw_data_path = "./raw_data.csv"
## processed data path
processed_data_path = "./cleaned_dataset.csv"

In [15]:
# S3 client
s3 = boto3.client("s3", region_name=region)

# set the S3 keys for raw and processed data
raw_s3_key = "data/raw/raw_data.csv"
processed_s3_key = "data/processed/cleaned_dataset.csv"

# Upload raw data
if os.path.exists(raw_data_path):
    s3.upload_file(raw_data_path, bucket, raw_s3_key)
    raw_data_s3_uri = f"s3://{bucket}/data/raw/raw_data.csv"
    print(f"Uploaded raw data to {raw_data_s3_uri}")

else:
    print("raw_data.csv not found")

# Upload processed data
if os.path.exists(processed_data_path):
    s3.upload_file(processed_data_path, bucket, processed_s3_key)
    processed_data_s3_uri = f"s3://{bucket}/data/processed/cleaned_data.csv"
    print(f"Uploaded processed data to {processed_data_s3_uri}")
else:
    print("cleaned_data.csv not found")

Uploaded raw data to s3://sagemaker-us-east-1-306617143793/data/raw/raw_data.csv
Uploaded processed data to s3://sagemaker-us-east-1-306617143793/data/processed/cleaned_data.csv


In [16]:
# store the S3 URIs for later use
%store raw_data_s3_uri
%store processed_data_s3_uri

Stored 'raw_data_s3_uri' (str)
Stored 'processed_data_s3_uri' (str)
