In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
import sys
sys.path.append('../') # Add root directory to path to import src
from src.feature_engineering import create_features

In [None]:
# Download historical data for a specific stock (e.g., VOO)
stock_df = yf.download('VOO', start='2014-01-01', end='2024-01-01')

In [None]:
featured_df = create_features(stock_df)

In [None]:
# Target: Will the price go up (1) or down (0) tomorrow?
featured_df['Target'] = np.where(featured_df['Close'].shift(-1) > featured_df['Close'], 1, 0)
# We can't use the last row since we don't know the future
featured_df = featured_df[:-1]

In [None]:
features = ['ma20', 'ma50', 'volatility', 'rsi']
X = featured_df[features]
y = featured_df['Target']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
predictions = model.predict(X_test)
print(f"Model Accuracy: {accuracy_score(y_test, predictions):.2f}")

In [None]:
joblib.dump(model, '../models/stock_predictor.pkl')
print("Model saved to ../models/stock_predictor.pkl")