In [310]:
import pandas
import yfinance as yf

In [None]:
sp500 = yf.Ticker("^GSPC")

In [312]:
sp500 = sp500.history(period="max")

In [313]:
sp500.index

In [314]:

sp500.plot.line(y="Close", use_index=True)

In [315]:
del sp500["Dividends"]
del sp500["Stock Splits"]

In [316]:
sp500["Tomorrow"] = sp500["Close"].shift(-1)

In [317]:
sp500

In [318]:
sp500["Target"] = (sp500["Tomorrow"] > sp500["Close"]).astype(int)

In [319]:
sp500

In [320]:
sp500 = sp500.loc["1990-01-01":].copy()

In [321]:
sp500

In [322]:
from sklearn.ensemble import RandomForestClassifier

In [323]:
model = RandomForestClassifier(n_estimators=1000, min_samples_split=100, random_state=1)

train = sp500.iloc[:-100]
test = sp500.iloc[-100:]

predictors = ["Close", "Volume", "Open", "High", "Low"]
model.fit(train[predictors], train["Target"])

In [324]:
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Target"])
    preds = model.predict(test[predictors])
    preds = pd.Series(preds, index = test.index, name = "Predictions")
    combined = pd.concat([test["Target"], preds], axis = 1)
    return combined

In [325]:
def backtest(data, model, predictors, start=2500, step = 250):
    all_predictions = []
    
    for i in range(start, data.shape[0], step):
        train = data.iloc[0:i].copy();
        test = data.iloc[i:(i+step)].copy();
        predictions = predict(train, test, predictors, model)
        all_predictions.append(predictions)
    return pd.concat(all_predictions);

In [326]:
horizons = [2,5,60,250,1000]
new_predictors = []

for horizon in horizons:
    rolling_averages = sp500.rolling(horizon).mean()
    
    ratio_column = f"Close_Ratio_{horizon}"
    sp500[ratio_column] = sp500["Close"] / rolling_averages["Close"]
    
    trend_column = f"Trend_{horizon}"
    sp500[trend_column] = sp500.shift(1).rolling(horizon).sum()["Target"]
    
    new_predictors += [ratio_column, trend_column]

In [327]:


# Define the ticker symbols for the companies
tickers = ['AAPL', 'MSFT', 'JNJ', 'AMZN', 'GOOGL','BRK-A']

# Use a dictionary comprehension to create a dictionary where the keys are the ticker symbols and the values are the corresponding dataload
stocks = {ticker: yf.Ticker(ticker).history(period="max") for ticker in tickers}

# Now you can access the dataload for a specific stock like this:
apple_data = stocks['AAPL']
microsoft_data = stocks['MSFT']
jnj_data = stocks['JNJ']
amazon_data = stocks['AMZN']
alphabet_data = stocks['GOOGL']
berkshire_data = stocks['BRK-A']

apple_data = apple_data.loc["1990-01-01":].copy()
microsoft_data = microsoft_data.loc["1990-01-01":].copy()
jnj_data = jnj_data.loc["1990-01-01":].copy()
amazon_data = amazon_data.loc["1990-01-01":].copy()
alphabet_data = alphabet_data.loc["1990-01-01":].copy()
berkshire_data = berkshire_data.loc["1990-01-01":].copy()

In [328]:
import pandas as pd
import numpy as np

# Calculate the daily price change for each company
for ticker, data in stocks.items():
    data['Price Change'] = np.sign(data['Close'] - data['Open'])

# Concatenate all the 'Price Change' series into one DataFrame with dates
all_price_changes = pd.concat([data.reset_index()[['Date', 'Price Change']] for data in stocks.values()])

# Group by date and calculate the mode of the price changes for each day
mode_price_change = all_price_changes.groupby('Date')['Price Change'].apply(lambda x: x.mode()[0])

# Merge the mode price change into the sp500 DataFrame
sp500 = sp500.merge(mode_price_change.rename('Mode Price Change'), left_index=True, right_index=True, how='left')

In [329]:
sp500

In [330]:
horizons = [2, 5, 60, 250, 1000]
for horizon in horizons:
    trend_key_change = f'Trend_Key_Change_{horizon}'
    sp500[trend_key_change] = sp500['Mode Price Change'].shift(1).rolling(horizon).sum()
    new_predictors.append(trend_key_change)

In [331]:
sp500

In [332]:
new_predictors

In [333]:
sp500 = sp500.dropna()

In [334]:
sp500

In [335]:
model = RandomForestClassifier(n_estimators = 1000, min_samples_split = 50, random_state = 1)

In [336]:
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Target"])
    preds = model.predict_proba(test[predictors])[:,1]
    preds[preds >= 0.6] = 1
    preds[preds < 0.6] = 0
    preds = pd.Series(preds, index = test.index, name = "Predictions")
    combined = pd.concat([test["Target"], preds], axis = 1)
    return combined

In [337]:
predictions = backtest(sp500, model, new_predictors)

In [338]:
predictions

In [339]:
predictions["Predictions"].value_counts()

In [340]:
from sklearn.metrics import precision_score

In [341]:
precision_score(predictions["Target"], predictions["Predictions"])

In [342]:
tomorrow_data = sp500.iloc[-2000:].copy()

In [1]:
tomorrow_prediction = model.predict_proba(tomorrow_data[new_predictors])

In [344]:
tomorrow_prediction

In [345]:
tomorrow_prediction[:, 1] = (tomorrow_prediction[:, 1] >= 0.57).astype(int)

In [346]:
tomorrow_prediction

In [347]:
import pickle

with open("updatedSPmodel.pkl", "wb") as f:
    pickle.dump(model, f)