In [3]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline

# For reading stock data from yahoo
from pandas_datareader.data import DataReader
import yfinance as yf
from pandas_datareader import data as pdr

yf.pdr_override()

# For time stamps
from datetime import datetime


end = datetime.now()
start = datetime(end.year - 1, end.month, end.day)

In [4]:
#create list of restaurant ticker symbols called ticker symbols
ticker_list = ['MCD', 'CMG', 'YUM', 'QSR', 'DRI', 'YUMC', 'DPZ', 'WING', '6862.HK', 'TXRH', 'CAVA', '2702.T', 'JBFCF', 'SHAK', 'WEN', 'DMP.AX', 'PLAY', 'ARCO', 'BLMN', 'PZZA', 'DEVYANI.NS', 'SSPG.L', 'EAT', 'CAKE', 'SG', 'FWRG', 'JACK', 'EAT.MC', 'KRUS', 'MTY.TO', 'BJRI', 'PTLO', '9658.HK', 'DIN', '0341.HK', 'CHUY', 'BH', 'TAST', 'DENN', 'PBPB', '2753.TW', 'PZA.TO', 'LOCO', 'NATH', 'BARBEQUE.NS', 'GENK', 'RBD.NZ', 'THCH', 'FAT', '0052.HK', 'STKS', 'RRGB', 'NDLS', 'RAVE']
#removed 'AMR.AE' and 'RTN.L'
all_data = pd.DataFrame()
# Define a function to calculate RSI
def calculate_rsi(data, window=14):
    delta = data.diff(1)
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()

    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

# Define a function to calculate SMA
def calculate_sma(data, window=20):
    return data.rolling(window=window).mean()

for ticker in ticker_list:
    # Fetch the daily data
    daily_data = yf.download(ticker, start=start, end=end)[['Close', 'Volume']].copy()
    daily_data.reset_index(inplace=True)

    daily_data['RSI'] = calculate_rsi(daily_data['Close'])
    daily_data['SMA_20'] = calculate_sma(daily_data['Close'])

    # Add lagged features for 'Close' and 'Volume'
    daily_data['Close_lag1'] = daily_data['Close'].shift(1)
    daily_data['Volume_lag1'] = daily_data['Volume'].shift(1)

    # Fetch the market cap and target price (mean)
    ticker_obj = yf.Ticker(ticker)
    info = ticker_obj.info
    market_cap = info.get('marketCap', None)
    target_mean_price = info.get('targetMeanPrice', None)

    # Add the ticker, market cap, and target mean price to the daily data
    daily_data['Ticker'] = ticker
    daily_data['Market_Cap'] = market_cap
    daily_data['Target_Mean_Price'] = target_mean_price

    # Append to the main DataFrame
    all_data = pd.concat([all_data, daily_data])

# Convert 'Ticker' column to one-hot encoded columns
#all_data = pd.get_dummies(all_data, columns=['Ticker'])

print(all_data.shape)
print(all_data.head())

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

(13363, 10)
        Date       Close   Volume  RSI  SMA_20  Close_lag1  Volume_lag1  \
0 2023-03-06  270.640015  2217600  NaN     NaN         NaN          NaN   
1 2023-03-07  267.130005  2766600  NaN     NaN  270.640015    2217600.0   
2 2023-03-08  265.329987  2314000  NaN     NaN  267.130005    2766600.0   
3 2023-03-09  261.630005  2339300  NaN     NaN  265.329987    2314000.0   
4 2023-03-10  262.029999  3093100  NaN     NaN  261.630005    2339300.0   

  Ticker    Market_Cap  Target_Mean_Price  
0    MCD  210073796608             327.05  
1    MCD  210073796608             327.05  
2    MCD  210073796608             327.05  
3    MCD  210073796608             327.05  
4    MCD  210073796608             327.05  


In [5]:
# Step 1: Feature Preparation
# Drop rows with missing values to simplify the example
all_data.dropna(inplace=True)

# Step 2: Label Creation
# Shift the 'Close' price to create the target variable: 1 if the price goes up, else 0
all_data['Target'] = (all_data.groupby('Ticker')['Close'].shift(-1) > all_data['Close']).astype(int)

# Filter out the last row for each ticker since it won't have a label
all_data = all_data.groupby('Ticker').apply(lambda x: x.iloc[:-1]).reset_index(drop=True)

all_data = pd.get_dummies(all_data, columns=['Ticker'])

# Assuming we are focusing on one stock for simplicity, filter by ticker if desired
# all_data = all_data[all_data['Ticker'] == 'AAPL']

# Separate features and target variable
X = all_data[['RSI', 'SMA_20', 'Close_lag1', 'Volume_lag1']]
y = all_data['Target']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 3: Model Training
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.5122067975107707


In [6]:
scaler = StandardScaler()
feature_columns = ['RSI', 'SMA_20', 'Close_lag1', 'Volume_lag1']
X = scaler.fit_transform(all_data[feature_columns])
y = all_data['Target'].values

# Sequential Split
# Let's say we use the last 60 days as test data
train_size = int(len(X) * 0.95)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [8]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 0.6940279603004456, Test Accuracy: 0.4894837439060211
