In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import pickle

# Load and prepare data
df = pd.read_csv("data/AAPL_data.csv")
df = compute_technical_indicators(df)
# Create target: 1 if next day's Close is higher, else 0
df['Target'] = (df['Close'].shift(-1) > df['Close']).astype(int)
df.dropna(inplace=True)

features = ['Close', 'Volume', 'MA20', 'MA50', 'RSI', 'MACD', 'Signal']
X = df[features]
y = df['Target']

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
acc_rf = accuracy_score(y_test, y_pred_rf)
cm_rf = confusion_matrix(y_test, y_pred_rf)

print(f"Random Forest Accuracy: {acc_rf:.2f}")
print("Confusion Matrix:\n", cm_rf)

# (Optional) Train Logistic Regression
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
print("Logistic Regression Accuracy:", accuracy_score(y_test, lr.predict(X_test)))

# (Optional) Train XGBoost
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)
print("XGBoost Accuracy:", accuracy_score(y_test, xgb.predict(X_test)))

# Save models
pickle.dump(rf, open("models/random_forest_model.pkl", 'wb'))
pickle.dump(xgb, open("models/xgboost_model.pkl", 'wb'))
pickle.dump(lr, open("models/logistic_model.pkl", 'wb'))
