In [None]:
!pip install yfinance --quiet

import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
class DataLoader:
    def __init__(self, ticker: str, start_date: str, end_date: str):
        self.ticker = ticker
        self.start_date = start_date
        self.end_date = end_date

    def fetch_data(self) -> pd.DataFrame:
        df = yf.download(self.ticker, start=self.start_date, end=self.end_date)
        df.reset_index(inplace=True)
        return df


In [None]:
loader = DataLoader("AAPL", "2020-01-01", "2023-12-31")
data = loader.fetch_data()
data.head()


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


Price,Date,Close,High,Low,Open,Volume
Ticker,Unnamed: 1_level_1,AAPL,AAPL,AAPL,AAPL,AAPL
0,2020-01-02,72.620842,72.681289,71.373218,71.627092,135480400
1,2020-01-03,71.914818,72.676447,71.689957,71.847118,146322800
2,2020-01-06,72.487869,72.526556,70.783271,71.034732,118387200
3,2020-01-07,72.146942,72.753823,71.926915,72.497529,108872000
4,2020-01-08,73.307526,73.60976,71.849548,71.849548,132079200


In [None]:
class Preprocessor:
    def __init__(self, df: pd.DataFrame):
        self.df = df

    def add_features(self) -> pd.DataFrame:
        df = self.df.copy()
        df["Return"] = df["Close"].pct_change()
        df["MA_5"] = df["Close"].rolling(window=5).mean()
        df["MA_10"] = df["Close"].rolling(window=10).mean()
        df["Target"] = (df["Close"].shift(-1) > df["Close"]).astype(int)  # 1 if price will rise next day
        df.dropna(inplace=True)
        return df


In [None]:
prep = Preprocessor(data)
processed_data = prep.add_features()
processed_data.head()



Price,Date,Close,High,Low,Open,Volume,Return,MA_5,MA_10,Target
Ticker,Unnamed: 1_level_1,AAPL,AAPL,AAPL,AAPL,AAPL,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
9,2020-01-15,75.278061,76.283897,74.84526,75.401375,121923600,-0.004286,75.483109,73.989354,1
10,2020-01-16,76.221062,76.332289,75.459433,75.822114,108829200,0.012527,75.754398,74.349376,1
11,2020-01-17,77.064888,77.067301,76.163018,76.470086,137816400,0.011071,76.1606,74.864383,0
12,2020-01-21,76.542625,77.135,76.404805,76.692532,110843200,-0.006777,76.14174,75.269859,1
13,2020-01-22,76.815872,77.36956,76.721572,77.028639,101832400,0.00357,76.384502,75.736752,1


In [None]:
# Choose relevant features
features = ["Return", "MA_5", "MA_10"]
target = "Target"

X = processed_data[features]
y = processed_data[target]

# Split into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [None]:
# Train Random Forest
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

# Train Decision Tree
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)

# Train KNN
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)


In [None]:
def evaluate_model(model, X_test, y_test, name):
    print(f"--- {name} ---")
    y_pred = model.predict(X_test)
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    print("\n")


In [None]:
evaluate_model(rf, X_test, y_test, "Random Forest")
evaluate_model(dt, X_test, y_test, "Decision Tree")
evaluate_model(knn, X_test, y_test, "KNN")


--- Random Forest ---
Confusion Matrix:
 [[78 14]
 [91 17]]

Classification Report:
               precision    recall  f1-score   support

           0       0.46      0.85      0.60        92
           1       0.55      0.16      0.24       108

    accuracy                           0.47       200
   macro avg       0.50      0.50      0.42       200
weighted avg       0.51      0.47      0.41       200



--- Decision Tree ---
Confusion Matrix:
 [[40 52]
 [39 69]]

Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.43      0.47        92
           1       0.57      0.64      0.60       108

    accuracy                           0.55       200
   macro avg       0.54      0.54      0.54       200
weighted avg       0.54      0.55      0.54       200



--- KNN ---
Confusion Matrix:
 [[78 14]
 [99  9]]

Classification Report:
               precision    recall  f1-score   support

           0       0.44      0.85      0.5