In [1]:
import warnings
warnings.filterwarnings('ignore')
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import statsmodels.api as sm
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

# Load Apple stock data
data = yf.download('AAPL', start='2024-06-20', end='2024-09-21')

# Create target variable (up or down)
data['Target'] = np.where(data['Close'] > data['Close'].shift(1), 1, 0)

# Drop rows with missing values
data = data.dropna()

# Define features
X = data[['Open', 'High', 'Low', 'Volume']]
y = data['Target']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply Min-Max Scaling
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Apply Z-Score Scaling
z_scaler = StandardScaler()
X_train_z = z_scaler.fit_transform(X_train)
X_test_z = z_scaler.transform(X_test)

# Logistic Regression
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train_scaled, y_train)
y_pred_logreg = logreg.predict(X_test_scaled)
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_logreg))
print("Logistic Regression Classification Report:\n", classification_report(y_test, y_pred_logreg))

# Decision Tree
dt = DecisionTreeClassifier()
dt.fit(X_train_scaled, y_train)
y_pred_dt = dt.predict(X_test_scaled)
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))
print("Decision Tree Classification Report:\n", classification_report(y_test, y_pred_dt))

# Random Forest
rf = RandomForestClassifier()
rf.fit(X_train_scaled, y_train)
y_pred_rf = rf.predict(X_test_scaled)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Random Forest Classification Report:\n", classification_report(y_test, y_pred_rf))

# Gradient Boosting
gb = GradientBoostingClassifier()
gb.fit(X_train_scaled, y_train)
y_pred_gb = gb.predict(X_test_scaled)
print("Gradient Boosting Accuracy:", accuracy_score(y_test, y_pred_gb))
print("Gradient Boosting Classification Report:\n", classification_report(y_test, y_pred_gb))

# Statsmodels Logistic Regression
X_train_sm = sm.add_constant(X_train_scaled)
logreg_sm = sm.OLS(y_train, X_train_sm).fit()
print(logreg_sm.summary())

# Plotting
fig1 = px.scatter_matrix(data)
fig1.update_layout(title='Scatter Matrix')
fig1.show()

fig2 = px.line(data, x=data.index, y='Close')
fig2.update_layout(title='AAPL Stock Price')
fig2.show()

fig3 = px.histogram(data, x='Volume', color='Target')
fig3.update_layout(title='Volume Distribution by Target')
fig3.show()

# Plotting predictions
y_pred = [y_pred_logreg, y_pred_dt, y_pred_rf, y_pred_gb]
models = ['Logistic Regression', 'Decision Tree', 'Random Forest', 'Gradient Boosting']
fig4 = go.Figure(data=[go.Bar(x=models, y=[accuracy_score(y_test, y) for y in y_pred])])
fig4.update_layout(title='Model Accuracy Comparison')
fig4.show()

[*********************100%***********************]  1 of 1 completed


Logistic Regression Accuracy: 0.7692307692307693
Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.25      0.40         4
           1       0.75      1.00      0.86         9

    accuracy                           0.77        13
   macro avg       0.88      0.62      0.63        13
weighted avg       0.83      0.77      0.72        13

Decision Tree Accuracy: 0.6923076923076923
Decision Tree Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.50      0.50         4
           1       0.78      0.78      0.78         9

    accuracy                           0.69        13
   macro avg       0.64      0.64      0.64        13
weighted avg       0.69      0.69      0.69        13

Random Forest Accuracy: 0.6153846153846154
Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.33      0.25   