In [None]:
# Cell 1: Setup & Imports
import sys, os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# allow imports from src/
sys.path.append(os.path.abspath(os.path.join("..", "src")))

from data.download_data import download_stock_data
from strategies.ml_model import train_xgboost, evaluate_model  # will create these next

In [None]:
# Cell 2: Load data & features
# Choose ticker & dates
ticker     = "JOBY"
start_date = "2024-01-01"
end_date   = "2024-06-30"

# Download or load from CSV
df = download_stock_data(ticker, start_date, end_date, save_csv=True)

# Basic feature engineering
df["MA_10"]    = df["Close"].rolling(10).mean()
df["MA_50"]    = df["Close"].rolling(50).mean()
df["Return1"]  = df["Close"].pct_change(1)
df["Return5"]  = df["Close"].pct_change(5)
df["Volatility"] = df["Close"].rolling(10).std()

# Target: will price close higher tomorrow?
df["Target"] = (df["Close"].shift(-1) > df["Close"]).astype(int)

# Drop NaNs
df.dropna(inplace=True)

In [None]:
# Cell 3: Train/test split
# Split by date (time-series split)
split_date = "2024-05-01"
train = df[df["Date"] < split_date]
test  = df[df["Date"] >= split_date]

X_train = train[["MA_10","MA_50","Return1","Return5","Volatility"]]
y_train = train["Target"]
X_test  = test[["MA_10","MA_50","Return1","Return5","Volatility"]]
y_test  = test["Target"]

In [None]:
# Cell 4: Model training
import xgboost as xgb

model = xgb.XGBClassifier(
    n_estimators=100,
    max_depth=3,
    learning_rate=0.1,
    use_label_encoder=False,
    eval_metric="logloss"
)
model.fit(X_train, y_train)

In [None]:
# Cell 5: Evaluation with Matplotlib-only confusion matrix
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt

# Predictions & metrics
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Confusion matrix data
cm = confusion_matrix(y_test, y_pred)

# Plot confusion matrix
fig, ax = plt.subplots(figsize=(5, 5))
im = ax.imshow(cm, interpolation='nearest', cmap='Blues')
plt.colorbar(im, ax=ax)

# Annotate cells with counts
thresh = cm.max() / 2
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, cm[i, j],
                ha="center", va="center",
                color="white" if cm[i, j] > thresh else "black")

# Axis labels
ax.set_xticks([0, 1])
ax.set_yticks([0, 1])
ax.set_xticklabels(['Pred 0', 'Pred 1'])
ax.set_yticklabels(['True 0', 'True 1'])
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

In [None]:
# Cell 6: Feature importance
importances = model.feature_importances_
feat_imp = pd.Series(importances, index=X_train.columns).sort_values(ascending=False)

plt.figure(figsize=(8,4))
feat_imp.plot.bar()
plt.title("Feature Importances")
plt.show()

In [None]:
# Cell 7: Save model
import joblib

os.makedirs("models", exist_ok=True)
joblib.dump(model, f"models/{ticker}_xgb_model.pkl")
print("Model saved.")
