In [1]:
# Week 6: Logistic Regression & Random Forest Classification

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Load cleaned dataset
df = pd.read_csv("TeslaStock_Dataset_Cleaned.csv")

# Step 2: Create "Price_Up" target column
df['Tomorrow_Close'] = df['Close'].shift(-1)
df['Price_Up'] = (df['Tomorrow_Close'] > df['Close']).astype(int)

# Drop last row (NaN after shift)
df = df.dropna()

# Step 3: Select features (you can add more later like MA, Volume etc.)
X = df[['Open', 'High', 'Low', 'Volume']]
y = df['Price_Up']

# Step 4: Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Step 5: Logistic Regression
log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train, y_train)
y_pred_log = log_model.predict(X_test)
log_acc = accuracy_score(y_test, y_pred_log)

# Step 6: Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
rf_acc = accuracy_score(y_test, y_pred_rf)

# Step 7: Compare results
print("📊 Model Accuracy Comparison:")
print(f"Logistic Regression Accuracy: {log_acc:.4f}")
print(f"Random Forest Accuracy: {rf_acc:.4f}")

print("\nClassification Report (Random Forest):")
print(classification_report(y_test, y_pred_rf))


📊 Model Accuracy Comparison:
Logistic Regression Accuracy: 0.5121
Random Forest Accuracy: 0.5077

Classification Report (Random Forest):
              precision    recall  f1-score   support

           0       0.49      0.28      0.36       222
           1       0.51      0.72      0.60       233

    accuracy                           0.51       455
   macro avg       0.50      0.50      0.48       455
weighted avg       0.50      0.51      0.48       455

