In [None]:
!pip install yfinance pmdarima statsmodels

In [34]:
from kaggle_secrets import UserSecretsClient
IEX_CLOUD_API_TOKEN = UserSecretsClient().get_secret('iex_api_tokene')
import requests
import numpy as np
import pandas as pd
from scipy import stats 
import yfinance
from pmdarima.arima import auto_arima
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statistics import mean


In [35]:
#text file with names off all top 500 stocks
with open('/kaggle/input/stock-symbols/s_and_p_500_symbols.txt') as f:
    symbols = np.array(f.read().splitlines())

In [36]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
symbol_groups = list(chunks(symbols, 100))
symbol_strings = []
for i in range(0, len(symbol_groups)):
    symbol_strings.append(','.join(symbol_groups[i]))

In [37]:
stock_columns = [
                'Ticker', 
                'Price', 
                'Number of Shares to Buy', 
                'One-Year Price Return', 
                'One-Year Return Percentile',
                'Six-Month Price Return',
                'Six-Month Return Percentile',
                'Three-Month Price Return',
                'Three-Month Return Percentile',
                'One-Month Price Return',
                'One-Month Return Percentile',
                'Price-to-Earnings Ratio', 
                'HQM Score'
                ]

stock_data = pd.DataFrame(columns = stock_columns)

In [38]:
for symbol_string in symbol_strings:  
    #api request to iex cloud 
    data = requests.get(f'https://cloud.iexapis.com/v1/stock/market/batch/?types=stats,quote&symbols={symbol_string}&token={IEX_CLOUD_API_TOKEN}').json()
    for symbol in symbol_string.split(','):
        #iex can only retreive 100 company info per batch so 100 company batch is created
        if symbol == '':
            continue
        stock_data = stock_data.append(
                                    pd.Series([symbol, 
                                               data[symbol]['quote']['latestPrice'],
                                               'N/A',
                                               data[symbol]['stats']['year1ChangePercent'],
                                               'N/A',
                                               data[symbol]['stats']['month6ChangePercent'],
                                               'N/A',
                                               data[symbol]['stats']['month3ChangePercent'],
                                               'N/A',
                                               data[symbol]['stats']['month1ChangePercent'],
                                               'N/A',
                                               data[symbol]['quote']['peRatio'],
                                               'N/A'
                                               ], 
                                              index = stock_columns), 
                                        ignore_index = True)

In [39]:
time_periods = [
                'One-Year',
                'Six-Month',
                'Three-Month',
                'One-Month'
                ]

for row in stock_data.index:
    for time_period in time_periods:
        stock_data.loc[row, f'{time_period} Return Percentile'] = stats.percentileofscore(stock_data[f'{time_period} Price Return'], stock_data.loc[row, f'{time_period} Price Return'])/100

In [40]:
#getting all the percentile and taking mean of it
for row in stock_data.index:
    momentum_percentiles = []
    for time_period in time_periods:
        momentum_percentiles.append(stock_data.loc[row, f'{time_period} Return Percentile'])
    stock_data.loc[row, 'HQM Score'] = mean(momentum_percentiles)

In [41]:
stock_data = stock_data.sort_values(by='HQM Score', ascending=False)
best_momentum_stock = stock_data[:5]['Ticker']
stock_data = stock_data.sort_values(by='Price-to-Earnings Ratio', ascending=False)
best_value_stock = stock_data[:5]['Ticker']
#top 10 stocks by using both stratergy
ticker_data = np.append(best_momentum_stock.to_numpy(),best_value_stock.to_numpy())

In [44]:
ticker_returns={}
for ticker in ticker_data:
    #use yahoo finance api to get info of a company
    getCompanyInfo = yfinance.Ticker(ticker)
    #1 month interval stock data 
    data_train = getCompanyInfo.history(period="max",interval='1mo')
    data_train.reset_index(inplace=True)
    #removing time from date column
    data_train['Date'] = data_train['Date'].dt.date
    data_train.set_index('Date', inplace=True)
    #consider only closing price
    train = data_train['Close']
    #basic arima model
    model_arima= auto_arima(train,trace=True, error_action='ignore', start_p=1,start_q=1,max_p=3,max_q=3,
                  suppress_warnings=True,stepwise=False,seasonal=False)
    #5 year forecasting
    prediction = model_arima.predict(n_periods=60)
    #calculating profit or loss
    profit_or_loss = ((prediction[-1] * 100)/train.iloc[-1]) - 100
    ticker_returns[ticker]=profit_or_loss

 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=1685.016, Time=0.02 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=1684.481, Time=0.13 sec
 ARIMA(0,1,2)(0,0,0)[0] intercept   : AIC=1685.920, Time=0.18 sec
 ARIMA(0,1,3)(0,0,0)[0] intercept   : AIC=1684.448, Time=0.23 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=1684.891, Time=0.10 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=1686.301, Time=0.22 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=1687.619, Time=0.20 sec
 ARIMA(1,1,3)(0,0,0)[0] intercept   : AIC=1685.918, Time=0.31 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=1684.968, Time=0.17 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=1686.782, Time=0.21 sec
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=1663.872, Time=0.60 sec
 ARIMA(2,1,3)(0,0,0)[0] intercept   : AIC=inf, Time=0.67 sec
 ARIMA(3,1,0)(0,0,0)[0] intercept   : AIC=1685.947, Time=0.21 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=1685.855, Time=0.32 sec
 ARIMA(3,1,2)(0,0,0)[0] intercept   : AIC=1665.630, Time=0.68 sec

Best model:  A

In [45]:
print(ticker_returns)

{'FSLR': 19.44959581771147,
 'WYNN': 23.003338393969017,
 'ACGL': 147.34891835000826,
 'STLD': 203.80451932749492,
 'URI': 344.3600109995355,
 'CRM': -9.935263866024059,
 'STZ': 17.390398860674807,
 'NOW': 43.07101568199613,
 'GPN': 24.987077022125405,
 'MHK': 15.024757270256217}