In [1]:
import yfinance as yf
ticker = yf.Ticker("AAPL")

In [6]:
import numpy as np

In [2]:
data = ticker.history(period="10y")

In [3]:
data.to_csv("../data/AAPL.csv")

In [4]:
data["returns"] = data["Close"].pct_change()

In [7]:
data["movement"] = (data["returns"] > 0).astype(np.int8)

In [8]:
data["vol"] = data["returns"].rolling(14).std()

In [9]:
data["sma_10"] = data["Close"].rolling(10).mean()
data["sma_20"] = data["Close"].rolling(20).mean()

In [10]:
data["sma_50"] = data["Close"].rolling(50).mean()

In [11]:
data["ema_10"] = data["Close"].ewm(span=10, adjust=False).mean()
data["ema_20"] = data["Close"].ewm(span=20, adjust=False).mean()

In [12]:
data["ema_50"] = data["Close"].ewm(span=50, adjust=False).mean()

In [13]:
data["sma_10_ratio"] = data["sma_10"] / data["Close"]
data["sma_20_ratio"] = data["sma_20"] / data["Close"]
data["sma_50_ratio"] = data["sma_50"] / data["Close"]

data["ema_10_ratio"] = data["ema_10"] / data["Close"]
data["ema_20_ratio"] = data["ema_20"] / data["Close"]
data["ema_50_ratio"] = data["ema_50"] / data["Close"]

In [15]:
delta = data['Close'].diff()
gain = delta.where(delta > 0, 0.0)
loss = -delta.where(delta < 0, 0.0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
data["rsi"] = 100 - (100 / (1 + rs))

In [16]:
ema_12 = data['Close'].ewm(span=12, adjust=False).mean()
ema_26 = data['Close'].ewm(span=26, adjust=False).mean()
data['macd'] = ema_12 - ema_26
data['macd_signal'] = data['macd'].ewm(span=9, adjust=False).mean()

In [17]:
data["rsi"] = data["rsi"] / 100

In [18]:
data["volume_10_sma"] = data['Volume'].rolling(window=10).mean()
data['volume_ratio'] = data['Volume'] / data['volume_10_sma']

In [19]:
data.dropna(axis=0, inplace=True)

In [20]:
train = data.iloc[:1700]
test = data.iloc[1700:]

In [21]:
X_train = train[["vol", "sma_20_ratio", "sma_50_ratio", "ema_20_ratio", "ema_50_ratio", "rsi", "macd", "macd_signal", "volume_ratio"]]
Y_train = train["movement"]

X_test = test[["vol", "sma_20_ratio", "sma_50_ratio", "ema_20_ratio", "ema_50_ratio", "rsi", "macd", "macd_signal", "volume_ratio"]]
Y_test = test["movement"]

In [22]:
import pickle

In [25]:
decision_tree = pickle.load(open("../models/aapl_prediction_decision_tree.pkl", "rb"))
knn = pickle.load(open("../models/aapl_prediction_knn.pkl", "rb"))
nn = pickle.load(open("../models/aapl_prediction_nn.pkl", "rb"))

In [28]:
from sklearn.metrics import precision_score, recall_score, f1_score

In [29]:
train_x = X_train
train_y = Y_train

test_x = X_test
test_y = Y_test

In [30]:
def get_metrics(model):
    train_accuracy = (model.predict(train_x) == train_y).mean()
    test_accuracy = (model.predict(test_x) == test_y).mean()

    train_recall = recall_score(model.predict(train_x), train_y, average="weighted")
    test_recall = recall_score(model.predict(test_x), test_y, average="weighted")

    train_precision = precision_score(model.predict(train_x), train_y, average="weighted")
    test_precision = precision_score(model.predict(test_x), test_y, average="weighted")

    train_f1 = f1_score(model.predict(train_x), train_y, average="weighted")
    test_f1 = f1_score(model.predict(test_x), test_y, average="weighted")


    return {
        "train_accuracy": train_accuracy,
        "test_accuracy": test_accuracy,
        "train_precision": train_precision,
        "test_precision": test_precision,
        "train_recall": train_recall,
        "test_recall": test_recall,
        "train_f1": train_f1,
        "test_f1": test_f1
    }

In [None]:
train_

In [31]:
get_metrics(decision_tree)

{'train_accuracy': np.float64(0.9929411764705882),
 'test_accuracy': np.float64(0.5861618798955613),
 'train_precision': 0.9929544541260683,
 'test_precision': 0.5920065140244725,
 'train_recall': 0.9929411764705882,
 'test_recall': 0.5861618798955613,
 'train_f1': 0.9929422952815211,
 'test_f1': 0.5860455038598642}

In [32]:
get_metrics(knn)

{'train_accuracy': np.float64(0.6976470588235294),
 'test_accuracy': np.float64(0.5469973890339426),
 'train_precision': 0.7007280962630023,
 'test_precision': 0.5480857391070469,
 'train_recall': 0.6976470588235294,
 'test_recall': 0.5469973890339426,
 'train_f1': 0.6985364576328101,
 'test_f1': 0.5474665218792052}

In [33]:
get_metrics(nn)

{'train_accuracy': np.float64(0.5964705882352941),
 'test_accuracy': np.float64(0.5861618798955613),
 'train_precision': 0.6446348419905855,
 'test_precision': 0.5908827896649225,
 'train_recall': 0.5964705882352941,
 'test_recall': 0.5861618798955613,
 'train_f1': 0.6088975789875185,
 'test_f1': 0.5878007197257683}