1. lambda_function.py - To generate the Data

In [None]:
import json
import boto3
import yfinance as yf
import requests
from io import StringIO
from datetime import datetime

def lambda_handler(event, context):
    try:
        # Configuration
        news_api_key = 'fed8d7fc5e6b4a5a9b96fb4c3938b509'
        bucket_name = 'tesla-stock-project-bucket'
        output_file = 'combined_data/combined_tesla_data.csv'

        # Fetch Tesla Stock Data
        tsla_data = yf.download("TSLA", period="7d", interval="1d", auto_adjust=False)
        stock_lines = ["Date,Open,Close,Volume"]
        for index, row in tsla_data.iterrows():
            stock_lines.append(f"{index.strftime('%Y-%m-%d')},{row['Open']},{row['Close']},{row['Volume']}")

        # Fetch Tesla News & Sentiment
        url = f"https://newsapi.org/v2/everything?q=Tesla&sortBy=publishedAt&language=en&pageSize=5&apiKey={news_api_key}"
        response = requests.get(url)
        articles = response.json().get('articles', [])

        news_lines = ["Date,SentimentScore"]
        for article in articles:
            published = article.get('publishedAt', '')[:10]
            title = article.get('title', '')
            description = article.get('description', '')
            content = (title + ' ' + description).lower()

            # Simple Sentiment Calculation: positive words vs negative words
            positive_words = ['good', 'great', 'positive', 'growth', 'profit', 'successful', 'win']
            negative_words = ['bad', 'poor', 'loss', 'decline', 'negative', 'fail', 'drop']

            score = 0
            for word in positive_words:
                if word in content:
                    score += 1
            for word in negative_words:
                if word in content:
                    score -= 1

            news_lines.append(f"{published},{score}")

        # Merge Data (by Date)
        stock_dict = {}
        for line in stock_lines[1:]:
            parts = line.split(',')
            stock_dict[parts[0]] = parts[1:]

        final_lines = ["Date,Open,Close,Volume,SentimentScore"]
        for line in news_lines[1:]:
            parts = line.split(',')
            date = parts[0]
            sentiment = parts[1]
            if date in stock_dict:
                final_lines.append(f"{date},{','.join(stock_dict[date])},{sentiment}")

        # Upload to S3
        csv_buffer = StringIO()
        csv_buffer.write("\n".join(final_lines))

        s3 = boto3.client('s3')
        s3.put_object(Bucket=bucket_name, Key=output_file, Body=csv_buffer.getvalue())

        return {
            'statusCode': 200,
            'body': json.dumps('✅ Tesla Combined Stock + News Data Saved to S3 Successfully!')
        }
        
    except Exception as e:
        return {
            'statusCode': 500,
            'body': json.dumps(f'❌ Error: {str(e)}')
        }


2. Training the ML Model - RandomForest Algorithm

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
import boto3

# Step 1: Load cleaned dataset from S3
s3_path = "s3://tesla-stock-sentiment-analysis/tesla_balanced_training_data.csv"
df = pd.read_csv(s3_path)

# Step 2: Add new key feature
df['PriceChange'] = df['Close'] - df['Open']

# Step 3: Prepare feature and label sets
X = df[['Open', 'Close', 'Volume', 'SentimentScore', 'PriceChange']]
y = df['Label']

# step 4: Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train RandomForest model
model = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)
model.fit(X_train, y_train)

# Step 6: Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2%}")

# Step 7: Save the model locally
local_file = "tesla_model_fixed.pkl"
joblib.dump(model, local_file)
print(f"Model saved locally as {local_file}")

# step 8: Upload to S3
bucket = "tesla-stock-sentiment-analysis"
s3_key = "tesla_model.pkl"

s3 = boto3.client('s3')
s3.upload_file(local_file, bucket, s3_key)
print(f"Model uploaded to s3://{bucket}/{s3_key}")



3. Predction using the Trained Model

In [None]:
import pandas as pd
import joblib
import boto3

# Step 1: Load model from S3
bucket_name = "tesla-stock-sentiment-analysis"
s3_key = "tesla_model.pkl"
local_model_file = "tesla_model_downloaded.pkl"

s3 = boto3.client('s3')
s3.download_file(bucket_name, s3_key, local_model_file)
model = joblib.load(local_model_file)
print("Model loaded from S3 successfully!")

# Step 2: Ask user for TODAY's market input
try:
    open_price = float(input("Enter TODAY's OPEN price: "))
    close_price = float(input("Enter TODAY's CLOSE price: "))
    volume = int(input("Enter TODAY's VOLUME: "))
except ValueError:
    print("⚠️ Please enter valid numbers.")
    raise SystemExit

# step 3: Auto-compute remaining features
price_change = close_price - open_price
sentiment_score = 0.4 if price_change > 0 else -0.3  # simple rule for now

# Step 4: Prepare DataFrame for prediction
input_data = pd.DataFrame([{
    'Open': open_price,
    'Close': close_price,
    'Volume': volume,
    'SentimentScore': sentiment_score,
    'PriceChange': price_change
}])

# Reorder columns to match training
input_data = input_data[['Open', 'Close', 'Volume', 'SentimentScore', 'PriceChange']]

# Step 5: Predict tomorrow's trend
prediction = model.predict(input_data)[0]

# Step 6: Show prediction
print("\n🔮 Prediction for TOMORROW:")
if prediction == 1:
    print("📈 Tesla stock is predicted to go UP 🚀")
else:
    print("📉 Tesla stock is predicted to go DOWN 📉")


4. Deploying Streamlit

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import boto3
import joblib
import os
import requests
import io
from datetime import datetime
from transformers import pipeline

# Set Streamlit page config
st.set_page_config(page_title="Tesla Stock Predictor", layout="centered")

# Constants and secrets
NEWS_API_KEY = st.secrets["api"]["NEWS_API_KEY"]

# S3 Details
BUCKET_NAME = "tesla-stock-sentiment-analysis"
S3_KEY = "tesla_model_fixed.pkl"

# Load model from S3 directly into memory
@st.cache_resource
def load_model():
    s3 = boto3.client("s3")
    response = s3.get_object(Bucket=BUCKET_NAME, Key=S3_KEY)
    model_bytes = response['Body'].read()
    model = joblib.load(io.BytesIO(model_bytes))
    return model

model = load_model()

# Fetch real-time Tesla news sentiment
def fetch_sentiment_score():
    url = f"https://newsapi.org/v2/everything?q=Tesla&sortBy=publishedAt&language=en&apiKey={NEWS_API_KEY}"
    response = requests.get(url)
    articles = response.json().get("articles", [])

    headlines = []
    for art in articles[:5]:
        headlines.append({
            "title": art.get("title", "No Title"),
            "description": art.get("description", "No Description"),
            "publishedAt": art.get("publishedAt", "")[:19].replace("T", " ")
        })

    classifier = pipeline("sentiment-analysis")
    texts = [f"{a['title']}. {a['description']}" for a in headlines]
    sentiments = classifier(texts)
    avg_score = sum(1 if s["label"] == "POSITIVE" else -1 for s in sentiments) / len(sentiments)

    return round(avg_score, 3), headlines

# Streamlit UI
st.title("📊 Tesla Stock Movement Predictor")
st.markdown("Enter **TODAY'S** stock data to predict **TOMORROW'S** movement.")

open_price = st.number_input("Open Price (Today)", value=0.0)
close_price = st.number_input("Close Price (Today)", value=0.0)
volume = st.number_input("Volume Traded (Today)", value=0)

if st.button("🔍 Predict Tomorrow's Movement"):
    price_change = close_price - open_price
    sentiment_score, news_articles = fetch_sentiment_score()

    input_data = pd.DataFrame([{
        "Open": open_price,
        "Close": close_price,
        "Volume": volume,
        "SentimentScore": sentiment_score,
        "PriceChange": price_change
    }])["Open Close Volume SentimentScore PriceChange".split()]

    prediction = model.predict(input_data)[0]
    direction = "UP" if prediction == 1 else "DOWN"

    if prediction == 1:
        st.success("📈 Tesla stock is predicted to go UP tomorrow 🚀")
    else:
        st.error("📉 Tesla stock is predicted to go DOWN tomorrow 📉")

    # Show Tesla news
    st.markdown("### 📰 Current Tesla News Headlines")
    for i, a in enumerate(news_articles):
        st.markdown(f"**{i+1}. {a['title']}**")
        st.markdown(f"🕒 *{a['publishedAt']}*")
        st.markdown(f"📝 {a['description'] or 'No Description Available'}")
        st.markdown("---")
