In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
import boto3

# ✅ Step 1: Load cleaned dataset from S3
s3_path = "s3://tesla-stock-sentiment-analysis/tesla_balanced_training_data.csv"
df = pd.read_csv(s3_path)

# ✅ Step 2: Add new key feature
df['PriceChange'] = df['Close'] - df['Open']

# ✅ Step 3: Prepare feature and label sets
X = df[['Open', 'Close', 'Volume', 'SentimentScore', 'PriceChange']]
y = df['Label']

# ✅ Step 4: Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ✅ Step 5: Train RandomForest model
model = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)
model.fit(X_train, y_train)

# ✅ Step 6: Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"✅ Model Accuracy: {accuracy:.2%}")

# ✅ Step 7: Save the model locally
local_file = "tesla_model_fixed.pkl"
joblib.dump(model, local_file)
print(f"✅ Model saved locally as {local_file}")

# ✅ Step 8: Upload to S3
bucket = "tesla-stock-sentiment-analysis"
s3_key = "tesla_model.pkl"

s3 = boto3.client('s3')
s3.upload_file(local_file, bucket, s3_key)
print(f"✅ Model uploaded to s3://{bucket}/{s3_key}")



✅ Model Accuracy: 100.00%
✅ Model saved locally as tesla_model_fixed.pkl
✅ Model uploaded to s3://tesla-stock-sentiment-analysis/tesla_model.pkl
