# SageMaker Finance Prediction with Custom Script
This notebook demonstrates how to use a custom training script with AWS SageMaker to predict stock price movement.

In [None]:
!pip install boto3 sagemaker pandas yfinance --quiet

In [None]:
import boto3
import sagemaker
import pandas as pd
import yfinance as yf
from sagemaker.sklearn.estimator import SKLearn

## Download and preprocess stock data

In [None]:
df = yf.download("AAPL", start="2020-01-01", end="2023-01-01")
df = df[['Open', 'High', 'Low', 'Close', 'Volume']].dropna()

df['Return'] = df['Close'].pct_change()
df['MA5'] = df['Close'].rolling(window=5).mean()
df['MA10'] = df['Close'].rolling(window=10).mean()
df['Label'] = (df['Close'].shift(-1) > df['Close']).astype(int)
df.dropna(inplace=True)

features = ['Open', 'High', 'Low', 'Close', 'Volume', 'Return', 'MA5', 'MA10']
dataset = pd.concat([df['Label'], df[features]], axis=1)
train = dataset.sample(frac=0.8, random_state=42)
test = dataset.drop(train.index)

train.to_csv('finance_train.csv', header=False, index=False)
test.to_csv('finance_test.csv', header=False, index=False)

## Upload data to S3 and configure SageMaker estimator

In [None]:
session = boto3.Session()
region = session.region_name
sagemaker_session = sagemaker.Session()

bucket = 'your-sagemaker-bucket'  # <-- Change this
prefix = 'finance-custom-script'
role = 'arn:aws:iam::<account-id>:role/<SageMakerExecutionRole>'  # <-- Change this

train_input = sagemaker_session.upload_data('finance_train.csv', bucket=bucket, key_prefix=f'{prefix}/train')

script_estimator = SKLearn(
    entry_point='train.py',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    framework_version='1.0-1',
    py_version='py3',
    output_path=f's3://{bucket}/{prefix}/output',
    sagemaker_session=sagemaker_session
)

script_estimator.fit({'train': train_input})

## Deploy model and predict

In [None]:
predictor = script_estimator.deploy(
    instance_type='ml.m5.large',
    initial_instance_count=1,
    endpoint_name='finance-script-endpoint'
)

import pandas as pd
sample = pd.read_csv('finance_test.csv', header=None).iloc[0]
csv_input = ','.join([str(x) for x in sample[1:].values])
result = predictor.predict(csv_input)
print("Prediction:", result)

## Clean up

In [None]:
predictor.delete_endpoint()