In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup as bs
import requests
from datetime import datetime
from datetime import timedelta
import yfinance as yf
import yahoofinancials as yhf
import pickle
import time
from random import randint
import dateutil.parser as dparse
from tqdm import tqdm
from IPython.display import clear_output
import scipy.stats
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [12]:
"""Functions to find best short-long SMA periods for Train dataset"""
#Create a Function to signal when to buy and sell
def buy_sell_fun(data):
    sig_price_buy = []
    sig_price_sell = []
    flag = -1

    for i in range(len(data)):
        if data['SMA_short'][i] > data['SMA_long'][i]:
            if flag != 1:
                sig_price_buy.append(data['stock_adj_close'][i])
                sig_price_sell.append(np.nan)
                flag = 1
            else:
                sig_price_buy.append(np.nan)
                sig_price_sell.append(np.nan)
        elif data['SMA_short'][i] < data['SMA_long'][i]:
            if flag !=0:
                sig_price_buy.append(np.nan)
                sig_price_sell.append(data['stock_adj_close'][i])
                flag = 0
            else:
                sig_price_buy.append(np.nan)
                sig_price_sell.append(np.nan)
        else:
            sig_price_buy.append(np.nan)
            sig_price_sell.append(np.nan)

    return (sig_price_buy, sig_price_sell)

def chart_data(data):
    #store the buy and sell data into a variable
    buy_sell = buy_sell_fun(data)
    data['buy_signal_price'] = buy_sell[0]
    data['sell_signal_price']=buy_sell[1]

    #Looking for the dates of the buy signals
    buy_dates = data.loc[data['buy_signal_price']>0]

    #Looking for the dates of the sell signals
    sell_dates = data.loc[data['sell_signal_price']>0]

    #Dropping uneccessary columns
    buy_dates.drop(columns = ['sell_signal_price', 'SMA_short', 'SMA_long'], inplace = True)
    sell_dates.drop(columns = ['buy_signal_price', 'SMA_short', 'SMA_long'], inplace = True)

    # DF of dates with prices
    dates = pd.concat([buy_dates, sell_dates],sort = True)

    # Now to organize by the dates, drop adj_close price column, and make date the index
    dates.sort_values(by = 'Date', inplace = True)
    dates.drop(columns = ['stock_adj_close'],inplace = True)
    dates.set_index('Date', inplace = True)

def get_profit(data,s_sum): 
    sellList = [x for x in data['sell_signal_price'] if str(x) != 'nan']
    buyList = [x for x in data['buy_signal_price'] if str(x) != 'nan']
    current=0
    if buyList:
        stocks= s_sum/(buyList[0]*1.04)
        if len(buyList)>len(sellList):
            for i in range(0,len(sellList)):
                current = sellList[i]*0.96*stocks
                if current < 0:
                    profit = 0 - s_sum
                    return profit
                else:
                    if i == (len(buyList)-1):
                        stocks= current/(buyList[-1]*1.04)
                    else:
                        stocks= current/(buyList[i+1]*1.04)
            current = (data['stock_adj_close'].iloc[-1])*0.96*stocks
            if current < 0:
                profit = 0 - s_sum
                return profit
        else:
            for i in range(0,len(buyList)):
                current = sellList[i]*0.96*stocks
                if current <0:
                    profit= 0 - s_sum
                    return profit
                else:
                    if i == (len(buyList)-1):
                        stocks= current/(buyList[-1]*1.04)
                    else:
                        stocks= current/(buyList[i+1]*1.04)

        profit = current - s_sum
        return profit
    else:
        profit=0
        return profit

def best_profit(data,s_sum,short_list,long_list):
    best_periods={}
    for ticker in tqdm(data, position = 0, leave = True):
        top_prof=[0,0]
        max_profit=0
        try:
            perf = yf.download(ticker, start='2007-01-01', end='2014-12-31',interval='1d',progress=False)
        except:
            continue
        for short in short_list:
            sma_short = pd.DataFrame()
            sma_short['Adj_close'] = perf['Adj Close'].rolling(window=short).mean()
            for long in long_list:
                ticker_data=[]
                sma_long = pd.DataFrame()
                sma_long['Adj_close'] = perf['Adj Close'].rolling(window=long).mean()
                # Create a dataframe of all adj close prices
                udata = pd.DataFrame(index=perf.index)
                udata['stock_adj_close'] = perf['Adj Close']
                udata['SMA_short'] = sma_short['Adj_close']
                udata['SMA_long']= sma_long['Adj_close']
                udata.reset_index(inplace=True)
                chart_data(udata)
                profit = get_profit(udata,s_sum)
                ticker_data=[ticker,short,long,profit]
                clear_output(wait=True)
                if profit > max_profit:
                    max_profit = profit
                    top_prof=[short,long]
                    
        best_periods[ticker]=top_prof

    fdf= pd.DataFrame.from_dict(
        best_periods,columns=['Short Period','Long Period'],
                               orient='index')
    fdf.drop(fdf.loc[fdf['Short Period']==0].index, inplace=True)
    fdf.reset_index(drop=True)
    fdf.reset_index(inplace=True)
    fdf = fdf.rename(columns={'index': 'Short_Ticker'})
    return fdf

In [13]:
'''Find Best SMA - Based on train_corpus dates- up to 31-12-2014'''
tickers = pd.read_csv('Tickers_Filtered.csv')
s_list=[7,14,21,30]
l_list=[50,100,150,200]
s_sum=1000000
sma_df=best_profit(list(tickers['Short_Ticker']),s_sum,s_list,l_list)
sma_df

100%|██████████████████████████████████████████████████████████████████████████████| 2005/2005 [32:45<00:00,  1.02it/s]


Unnamed: 0,Short_Ticker,Short Period,Long Period
0,VRSK,21,200
1,AA,30,200
2,AEE,14,200
3,PFG,14,100
4,MD,21,150
...,...,...,...
598,BLS,30,200
599,GMED,7,150
600,RMK,21,200
601,SAM,7,200


In [15]:
"""Organizing Sentiment DataFrame"""
df= pd.read_csv('signal_from_bert_70.csv')
#Cleaning Ticker name and Date, filtering by Russell1000 stocks
ticker_list = [x.partition(':')[0] for x in df['Ticker']]
df.insert(1, 'Short_Ticker', ticker_list)
df= df[df['Short_Ticker'].isin(tickers['Short_Ticker'])]
dates_list = [y.partition(' ')[0] for y in df['Date']]
df.insert(3, 'Short_Date', dates_list)
df['Short_Date']=[datetime.strptime(str(x), "%d/%m/%Y").strftime("%Y-%m-%d") for x in df['Short_Date']]
#Creating a Year-Month column for further comparison
year_month = (pd.to_datetime(df['Short_Date'])+ pd.DateOffset(months=1)).dt.to_period('M')
df_year_month = [str(x) for x in year_month]
df.insert(4, 'Year_Month', df_year_month)
df.head()

Unnamed: 0,Ticker,Short_Ticker,Date,Short_Date,Year_Month,Negatives,Positives,Daily SA Score,Labor_Practices_Negatives,Labor_Practices_Positives,...,Business_Model_Resilience_Negatives,Business_Model_Resilience_Positives,Management_Of_Legal_And_Regulatory_Framework_Negatives,Management_Of_Legal_And_Regulatory_Framework_Positives,Selling_Practices_And_Product_Labeling_Negatives,Selling_Practices_And_Product_Labeling_Positives,Customer_Welfare_Negatives,Customer_Welfare_Positives,Physical_Impacts_Of_Climate_Change_Negatives,Physical_Impacts_Of_Climate_Change_Positives
6,AYI:US,AYI,04/01/2007 15:00,2007-01-04,2007-02,1,13,0.8,0,0,...,1,4,0,1,0,0,0,0,0,0
8,RPM:US,RPM,04/01/2007 15:00,2007-01-04,2007-02,3,3,0.0,0,0,...,0,0,2,0,0,0,0,0,0,0
10,STZ:US,STZ,04/01/2007 15:00,2007-01-04,2007-02,0,1,0.5,0,0,...,0,0,0,0,0,0,0,0,0,0
11,MSM:US,MSM,04/01/2007 16:00,2007-01-04,2007-02,0,1,0.5,0,0,...,0,0,0,0,0,0,0,0,0,0
25,FMCC:US,FMCC,05/01/2007 13:30,2007-01-05,2007-02,1,7,0.666667,0,0,...,0,0,1,1,0,0,0,0,0,0


In [19]:
"""Get stock data from yfinance, creating Financial df"""
add_data = []
for ticker in tqdm(list(tickers['Short_Ticker']), position = 0, leave = True):
    try:
        data = yf.download(ticker, start='2007-01-01', end='2021-01-05',interval="1d",progress=False)
    except:
        continue
    clear_output(wait=True)
    data.reset_index(inplace=True)
    period = 20
    multiplier = 2
    data['Date']=data['Date'].astype(str)
    data['Year_Month'] = [x[:7] for x in data['Date']]
    if ticker in list(sma_df['Short_Ticker']):
        index = sma_df[sma_df['Short_Ticker']==ticker].index.values[0]
        data['SMA_Short'] = data['Adj Close'].rolling(
            window=sma_df['Short Period'].iloc[index]).mean()
        data['SMA_Long']= data['Adj Close'].rolling(
            window=sma_df['Long Period'].iloc[index]).mean()
    else:
        data['SMA_Short'] = data['Adj Close'].rolling(
            window= int(sma_df['Short Period'].mean())).mean()
        data['SMA_Long']= data['Adj Close'].rolling(
            window= int(sma_df['Long Period'].mean())).mean()
    data['UpperBand'] = data['Adj Close'].rolling(period).mean() + data['Adj Close'].rolling(period).std() * multiplier
    data['LowerBand'] = data['Adj Close'].rolling(period).mean() - data['Adj Close'].rolling(period).std() * multiplier
    data=data.drop_duplicates('Year_Month', keep='first')
    data['Monthly_Yield'] = pd.DataFrame(data['Adj Close']).pct_change(periods=1, limit=None, freq=None)
    for index, row in data.iterrows():
        add_data.append([
                    ticker,row['Date'][:10],row['Year_Month'],row['SMA_Short'],
                    row['SMA_Long'],row['Adj Close'],row['Monthly_Yield'],row['UpperBand'],
                    row['LowerBand'],row['Volume']])
df1=pd.DataFrame(add_data, columns=[
    'Short_Ticker','Short_Date','Year_Month','SMA_Short','SMA_Long',
    'Adj_Close','Monthly_Yield','UpperBand','LowerBand','Volume'
])  
df1.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
100%|██████████████████████████████████████████████████████████████████████████████| 2005/2005 [21:43<00:00,  1.54it/s]


Unnamed: 0,Short_Ticker,Short_Date,Year_Month,SMA_Short,SMA_Long,Adj_Close,Monthly_Yield,UpperBand,LowerBand,Volume
0,EIX,2007-01-03,2007-01,,,28.98027,,,,2796100.0
1,EIX,2007-02-01,2007-02,,,28.89739,-0.00286,29.040994,27.048512,1407200.0
2,EIX,2007-03-01,2007-03,29.521841,,30.331743,0.049636,30.44034,28.750544,2543700.0
3,EIX,2007-04-02,2007-04,31.338062,,32.656277,0.076637,32.642917,30.262519,3635900.0
4,EIX,2007-05-01,2007-05,33.260176,,34.259285,0.049087,34.537826,32.218721,1199800.0


In [21]:
"""Merging SA and Financial Dataframes"""
mdf = pd.merge(df1, df, how="left", on=['Short_Ticker','Year_Month'])
mdf=mdf.drop([mdf.columns[10],mdf.columns[11]], axis=1)
mdf=mdf.rename(columns={'Short_Date_x': 'Yield_Date','Short_Date_y': 'Signal_Date'})
cols = ['Short_Ticker','Yield_Date','Signal_Date' ,'Year_Month']  + [col for col in mdf if col not in ['Short_Ticker', 'Yield_Date','Signal_Date' ,'Year_Month']]
mdf= mdf[cols]
mdf = mdf.sort_values(by=['Year_Month'])
mdf.reset_index(drop=True,inplace=True)
mdf.head()

Unnamed: 0,Short_Ticker,Yield_Date,Signal_Date,Year_Month,SMA_Short,SMA_Long,Adj_Close,Monthly_Yield,UpperBand,LowerBand,...,Business_Model_Resilience_Negatives,Business_Model_Resilience_Positives,Management_Of_Legal_And_Regulatory_Framework_Negatives,Management_Of_Legal_And_Regulatory_Framework_Positives,Selling_Practices_And_Product_Labeling_Negatives,Selling_Practices_And_Product_Labeling_Positives,Customer_Welfare_Negatives,Customer_Welfare_Positives,Physical_Impacts_Of_Climate_Change_Negatives,Physical_Impacts_Of_Climate_Change_Positives
0,EIX,2007-01-03,,2007-01,,,28.98027,,,,...,,,,,,,,,,
1,KLAC,2007-01-03,,2007-01,,,27.618727,,,,...,,,,,,,,,,
2,WRB,2007-01-03,,2007-01,,,18.180332,,,,...,,,,,,,,,,
3,CKH,2007-01-03,,2007-01,,,58.578136,,,,...,,,,,,,,,,
4,SHAW,2007-01-03,,2007-01,,,32.73,,,,...,,,,,,,,,,


In [23]:
"""Simulation Functions"""
def train_test_split(df):
    train_performance=[]
    test_performance=[]
    month_mean=[]
    month_std=[]
    split_ix = mdf.index[mdf['Year_Month'] == '2015-01'].tolist()[0]
    for month in tqdm(list(mdf['Year_Month'].unique()), position = 0, leave = True):
        start_ix = mdf.index[mdf['Year_Month'] == month].tolist()[0]
        end_ix = mdf.index[mdf['Year_Month'] == month].tolist()[-1]
        month_mean.append(
        mdf['Volume'].iloc[start_ix:end_ix].mean())
        month_std.append(
        mdf['Volume'].iloc[start_ix:end_ix].std())
    for index, row in tqdm(mdf.iterrows(), position = 0, leave = True,total=mdf.shape[0]):
            j=[]
            if np.isnan(row['Monthly_Yield']) or np.isnan(row['UpperBand']):
                upper_i = np.nan
                lower_i = np.nan
            else:    
                upper_gap = float(row['UpperBand']) - float(row['Adj_Close'])
                lower_gap = float(row ['Adj_Close']) - float(row['LowerBand'])
                if upper_gap <= 0.02*float(row['UpperBand']):
                    upper_i = -1
                elif upper_gap <= 0.1*float(row['UpperBand']):
                    upper_i = -0.5
                elif upper_gap <= 0.2*float(row['UpperBand']):
                    upper_i = -0.2                    
                else:
                    upper_i= 0
                if lower_gap <= 0.02*float(row['LowerBand']):
                    lower_i = 1
                elif lower_gap <= 0.1*float(row['LowerBand']):
                    lower_i = 0.5
                elif lower_gap <= 0.2*float(row['LowerBand']):
                    lower_i = 0.2                    
                else:
                    lower_i = 0
                    
            if np.isnan(row['SMA_Short']) or np.isnan(row['SMA_Long']):
                sma_i = np.nan
            else:
                sma_gap = float(row['SMA_Short']) - float(row['SMA_Long'])
                if sma_gap >=0:
                    sma_i=1
                else:
                    sma_i=-1                        
            if index < split_ix:
                month_ix = list(mdf['Year_Month'].unique()).index(row['Year_Month'])
                vol_mean = month_mean[month_ix]
                vol_std = month_std[month_ix]
                if np.isnan(row['Daily SA Score']):
                    rel_sa = np.nan
                else:
                    train_period = mdf.iloc[:split_ix]
                    is_ticker= train_period['Short_Ticker']==row['Short_Ticker']
                    month = row['Year_Month']
                    ticker_df = train_period[is_ticker]
                    ticker_ix = ticker_df.index[ticker_df['Year_Month'] == month][0]
                    rel_sa = float(
                        row['Daily SA Score']/ticker_df['Daily SA Score'].iloc[:ticker_ix].max())
                j.extend([
                row['Short_Ticker'],row['Yield_Date'],row['Signal_Date'],
                row['Year_Month'],row['SMA_Short'],row['SMA_Long'],
                sma_i,row['Adj_Close'],row['Monthly_Yield'],row['Daily SA Score'],
                rel_sa,row['UpperBand'],row['LowerBand'],upper_i,lower_i,
                row['Volume'],(row['Volume']-vol_mean)/vol_std])
                train_performance.append(j)
            else:
                month_ix = list(mdf['Year_Month'].unique()).index(row['Year_Month'])
                vol_mean = month_mean[month_ix]
                vol_std = month_std[month_ix]
                if np.isnan(row['Daily SA Score']):
                    rel_sa = np.nan
                else:
                    test_period = mdf.iloc[split_ix:]
                    is_ticker= test_period['Short_Ticker']==row['Short_Ticker']
                    month = row['Year_Month']
                    ticker_df = test_period[is_ticker]
                    ticker_ix = ticker_df.index[ticker_df['Year_Month'] == month][0]
                    rel_sa = float(
                        row['Daily SA Score']/ticker_df['Daily SA Score'].iloc[:ticker_ix].max())
                j.extend([
                row['Short_Ticker'],row['Yield_Date'],row['Signal_Date'],
                row['Year_Month'],row['SMA_Short'],row['SMA_Long'],
                sma_i,row['Adj_Close'],row['Monthly_Yield'],row['Daily SA Score'],
                rel_sa,row['UpperBand'],row['LowerBand'],upper_i,lower_i,
                row['Volume'],(row['Volume']-vol_mean)/vol_std])
                test_performance.append(j)     
    #Forming Train and Test corpuses
    train_corpus = pd.DataFrame(train_performance,columns=[
        'Short_Ticker','Yield_Date','Signal_Date','Year_Month','SMA_Short',
        'SMA_Long','SMA_Ind','Adj_Close','Monthly_Yield','Daily SA Score','Relative_SA',
        'UpperBand','LowerBand','Upper_Ind','Lower_Ind','Volume','Normalized_Volume'])
    test_corpus = pd.DataFrame(test_performance,columns=[
        'Short_Ticker','Yield_Date','Signal_Date','Year_Month','SMA_Short',
        'SMA_Long','SMA_Ind','Adj_Close','Monthly_Yield','Daily SA Score','Relative_SA',
        'UpperBand','LowerBand','Upper_Ind','Lower_Ind','Volume','Normalized_Volume'])
    return train_corpus,test_corpus

def get_portfolio_value(df,portfolio):
    portfolio_value = 0
    for ticker in list(portfolio.keys()):
        if portfolio.get(ticker)>0:
            try:
                ticker_price = float(df.loc[df['Short_Ticker'] == ticker, 'Adj_Close'].iloc[0])
            except Exception:
                ticker_price = 0
            try:
                ticker_value = float(ticker_price*portfolio.get(ticker))
            except Exception:
                ticker_value = 0
            portfolio_value += ticker_value
    return portfolio_value
        
def simulator(df,n,coefs,s_sum):
    profit = 0
    current = s_sum
    portfolio={}
    profits_dict={}
    first_month = list(df['Year_Month'].unique())[0]
    last_month = list(df['Year_Month'].unique())[-1]
    months = list(df['Year_Month'].unique())
    for month in tqdm(months , position = 0, leave = True):
        is_month = df['Year_Month']==month
        month_df = df[is_month]
        month_df = weighted_score(month_df,coefs)
        top_n = round(n*len(list(month_df['Short_Ticker'])))
        top_df = month_df.nlargest(top_n, 'Weighted_Score')
        top_df['Percent'] = [
            float(
                x/(top_df['Weighted_Score'].notna().sum())) for x in top_df['Weighted_Score']]
        if month == first_month:
            current,portfolio = buy(top_df,portfolio,current)
        elif month == last_month:
            current,portfolio = sell(top_df,month_df,portfolio,current,'YES')
            final_profit = round(100*(current - s_sum)/s_sum,3)
            profits_dict[(int(month[:4])-1)] = final_profit
        else:
            current,portfolio = sell(top_df,month_df,portfolio,current)
            if current <=0:
                print(f'Ran out of cash on {month[:4]}')
                return None
            if month[-2:]=='01':
                pfl_value = get_portfolio_value(month_df,portfolio)
                annual_profit = round(100*(current + pfl_value - s_sum)/s_sum,3)
                profits_dict[(int(month[:4])-1)] = annual_profit
            current,portfolio = buy(top_df,portfolio,current)

    return profits_dict

def buy(top_df,portfolio,current):
    base = current
    for ticker in list(top_df['Short_Ticker']):
        if ticker in list(portfolio.keys()):
            stocks = float(portfolio.get(ticker))
        else:
            stocks = 0
        invest = float(top_df.loc[top_df['Short_Ticker'] == ticker,\
                                   'Percent'].iloc[0])*base
        price = float(top_df.loc[top_df['Short_Ticker'] == ticker, 'Adj_Close'].iloc[0])*1.004
        stocks += float(invest/price)
        portfolio[ticker] = stocks
        current -= invest
    return current,portfolio

def sell(top_df,month_df,portfolio,current,indicator='NO'):
    if indicator == 'NO':
        for ticker in list(portfolio.keys()):
            if ticker not in list(top_df['Short_Ticker']):
                if pd.isnull(portfolio.get(ticker)):
                    continue
                else:
                    try:
                        price = 0.996*float(month_df.loc[
                            month_df['Short_Ticker'] == ticker,'Adj_Close'].iloc[0])
                    except Exception:
                        continue
                    sell = float(portfolio.get(ticker))*price
                    current += sell
                    portfolio[ticker]=0
    else:
        for ticker in list(portfolio.keys()):
            try:
                price = 0.996*float(month_df.loc[
                            month_df['Short_Ticker'] == ticker,'Adj_Close'].iloc[0])
            except Exception:
                continue
            sell = float(portfolio.get(ticker))*price                
            current += sell
            portfolio[ticker]=0       
    return current,portfolio

def train_model(train,s_sum):
    pd.set_option('mode.chained_assignment', None)
    df = train
    profit = 0
    n_list=[]
    current = s_sum
    portfolio={}
    t_coefs=[]
    first_month = list(df['Year_Month'].unique())[0]
    last_month = list(df['Year_Month'].unique())[-1]
    for n in tqdm(np.arange(0.01, 0.31, 0.01), position = 0, leave = True):
        for month in list(df['Year_Month'].unique()):
            is_month = df['Year_Month']==month
            month_df = df[is_month]
            coefs = train_coefs(month_df)
            t_coefs.append(coefs)
            month_df = weighted_score(month_df,coefs)
            top_n = round(n*len(list(month_df['Short_Ticker'])))
            top_df = month_df.nlargest(top_n, 'Weighted_Score')
            top_df['Percent'] = [
                float(x/(top_df['Weighted_Score'].sum())) for x in top_df['Weighted_Score']]
            if month == first_month:
                current,portfolio = buy(top_df,portfolio,current)
            elif month == last_month:
                current,portfolio = sell(top_df,month_df,portfolio,current,'YES')
            else:
                current,portfolio = sell(top_df,month_df,portfolio,current)
                if current <=0:
                    profit = -100
                    del t_coefs[-1]
                    break
                current,portfolio = buy(top_df,portfolio,current)
        profit = 100*(current - s_sum)/s_sum
        if profit >0:
            n_list.append(n)
        
    final_n = np.median(n_list)
    f_coefs = []
    SA1 = np.nanmean([x[0] for x in t_coefs])
    SMA1 =  np.nanmean([x[1] for x in t_coefs])
    U_I1 =  np.nanmean([x[2] for x in t_coefs])
    L_I1 =  np.nanmean([x[3] for x in t_coefs])
    nVol1 =  np.nanmean([x[4] for x in t_coefs])
    f_coefs.extend([SA1,SMA1,U_I1,L_I1,nVol1])
    return final_n, f_coefs

def train_coefs(train):
    coefs=[]
    df= train
    SA = scipy.stats.pearsonr(df['Monthly_Yield'].notna(),df['Relative_SA'].notna())[1]
    SMA = scipy.stats.pearsonr(df['Monthly_Yield'].notna(),df['SMA_Ind'].notna())[1]
    U_I = scipy.stats.pearsonr(df['Monthly_Yield'].notna(),df['Upper_Ind'].notna())[1]
    L_I = scipy.stats.pearsonr(df['Monthly_Yield'].notna(),df['Lower_Ind'].notna())[1]
    nVol = scipy.stats.pearsonr(
        df['Monthly_Yield'].notna(),df['Normalized_Volume'])[1]
    if not isinstance(SA, float):
        SA = np.nanmean([SMA,U_I,L_I,nVol])   
    if not isinstance(SMA, float):
        SMA = np.nanmean([SA,U_I,L_I,nVol])
    if not isinstance(U_I, float):
        U_I = np.nanmean([SA,SMA,L_I,nVol])
    if not isinstance(L_I, float):
        L_I = np.nanmean([SA,U_I,SMA,nVol])
    if not isinstance(nVol, float):
        nVol = np.nanmean([SA,U_I,L_I,SMA])
    if [SA,SMA,U_I,L_I,nVol] == [np.nan,np.nan,np.nan,np.nan,np.nan]:
        coefs.extend([0.2,0.2,0.2,0.2,0.2])
    else:
        coefs.extend([SA,SMA,U_I,L_I,nVol])
    return coefs

def weighted_score(df,coefs):
    SA = coefs[0]
    SMA = coefs[1]
    U_I = coefs[2]
    L_I = coefs[3]
    nVol = coefs[4]
    df['Weighted_Score'] = [
        nVol*z if np.isnan(x) and np.isnan(y) and np.isnan(w)  
        else (nVol*z + x*SA if np.isnan(y) and np.isnan(w)
        else (nVol*z + w*SMA if np.isnan(x) and np.isnan(y)
        else (nVol*z + y*U_I + v*L_I if np.isnan(x) and np.isnan(w)
        else (nVol*z + y*U_I + v*L_I + w*SMA if np.isnan(x)
        else (nVol*z + x*SA + w*SMA if np.isnan(y)
        else (nVol*z + x*SA + y*U_I + v*L_I if np.isnan(w)
        else nVol*z + x*SA + y*U_I + v*L_I + w*SMA))))))
        for v,w,x,y,z
        in zip(
            df['Lower_Ind'],df['SMA_Ind'],df['Relative_SA'],
            df['Upper_Ind'],df['Normalized_Volume'])]
    return df

In [24]:
train,test = train_test_split(mdf)

100%|████████████████████████████████████████████████████████████████████████████████| 169/169 [00:08<00:00, 20.12it/s]
100%|████████████████████████████████████████████████████████████████████████| 206477/206477 [2:27:39<00:00, 23.31it/s]


In [25]:
# sma_df.to_csv('SMA.csv')
# df1.to_csv('Monthly_Data.csv')
# mdf.to_csv('merged.csv')
# test.to_csv('test.csv')
# train.to_csv('train.csv')

In [26]:
s_sum=1000000
n,coefs = train_model(train,s_sum)
coefs

100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [10:29<00:00, 20.97s/it]


[0.3900565559161011,
 0.07427839882514041,
 0.03392599711677078,
 0.03392599711677078,
 0.43635819201105164]

In [27]:
n

0.105

In [28]:
my_profit = simulator(test,n,coefs,s_sum)
my_profit

100%|██████████████████████████████████████████████████████████████████████████████████| 73/73 [00:17<00:00,  4.25it/s]


{2015: -4.481,
 2016: 14.94,
 2017: 49.799,
 2018: 37.173,
 2019: 123.199,
 2020: 264.996}

In [29]:
compare = {'^GSPC':'','^RUT':'','^IXIC':'','^DJI':'','^RUI':''}
years=range(2015,2021)
for index in compare:
    index_dict={}
    for year in years:
        data = yf.download(
            index, start='2015-01-01', end=f'{year+1}-01-02',interval="1d",progress=False)
        j = data.iloc[[0, -1]]
        l = j['Adj Close'].pct_change(periods=1, limit=None, freq=None)        
        index_dict[year]= round(l.iloc[-1]*100,3)
    compare[index]=index_dict
compare['My Program']= my_profit
pd.DataFrame(compare)

Unnamed: 0,^GSPC,^RUT,^IXIC,^DJI,^RUI,My Program
2015,-0.727,-5.712,5.73,-2.233,-1.091,-4.481
2016,8.739,12.653,13.663,10.882,8.502,14.94
2017,29.856,27.46,45.763,38.692,29.487,49.799
2018,21.757,11.942,40.102,30.884,20.963,37.173
2019,56.918,38.497,89.453,60.121,55.912,123.199
2020,82.431,63.93,172.131,71.724,85.331,264.996
