#### Dashboard for investments

Idea: We want to invest when there is significant deviation away from the long-term mean

Steps:
1. Calculate long term trend of SNP500 - Loop through each time period in the dataframe and then conduct a ols regression and store the outputs. The bins are 15 year periods (12 * 15)
2. Determine at which points was there significant deviation away from trend (Jacksobian co-integration test)
3. Create threshold at which point there was significant deviation away that led you to an investment decision 
4. Create algorithm that invests when this happens

Data: 
1. Lets limit it to > 1900
2. Currently we just have a randomly CSV of the data, our goal is to find a constant datasource.

In [1]:
import os
import pandas as pd 
import numpy as np 
import matplotlib as plt 
from datetime import datetime

In [2]:
path = os.getcwd()
path

'/Users/dylanjohnson/Documents/bos_investments/bos_investments'

In [62]:
# env = os.chdir("./Documents/bos_investments")

In [204]:
df = pd.read_csv("s&p500.csv", infer_datetime_format = True)
df.head(6)

Unnamed: 0,Date,SP500,Dividend,Earnings,Consumer Price Index,Long Interest Rate,Real Price,Real Dividend,Real Earnings,PE10
0,1871-01-01,4.44,0.26,0.4,12.46,5.32,89.0,5.21,8.02,
1,1871-02-01,4.5,0.26,0.4,12.84,5.32,87.53,5.06,7.78,
2,1871-03-01,4.61,0.26,0.4,13.03,5.33,88.36,4.98,7.67,
3,1871-04-01,4.74,0.26,0.4,12.56,5.33,94.29,5.17,7.96,
4,1871-05-01,4.86,0.26,0.4,12.27,5.33,98.93,5.29,8.14,
5,1871-06-01,4.82,0.26,0.4,12.08,5.34,99.66,5.38,8.27,


### Step 1: Create the bins

What do we want to do:
1. Create a bin for each time_stamp

In [205]:
df['Date'] = pd.to_datetime(df['Date'])

In [208]:
df = df[df['Date'] >= "1900-01-01"]

In [210]:
df

Unnamed: 0,Date,SP500,Dividend,Earnings,Consumer Price Index,Long Interest Rate,Real Price,Real Dividend,Real Earnings,PE10
348,1900-01-01,6.10,0.22,0.48,7.90,3.15,192.98,6.88,15.19,18.67
349,1900-01-02,6.21,0.23,0.48,7.99,3.15,194.12,7.03,15.00,18.70
350,1900-01-03,6.26,0.23,0.48,7.99,3.14,195.69,7.27,15.00,18.78
351,1900-01-04,6.34,0.24,0.48,7.99,3.14,198.19,7.50,15.00,18.94
352,1900-01-05,6.04,0.25,0.48,7.80,3.13,193.41,7.93,15.37,18.40
...,...,...,...,...,...,...,...,...,...,...
1763,2017-01-12,2664.34,48.93,109.88,246.52,2.40,2700.13,49.59,111.36,32.09
1764,2018-01-01,2789.80,49.29,,247.87,2.58,2811.96,49.68,,33.31
1765,2018-01-02,2705.16,49.64,,248.99,2.86,2714.34,49.81,,32.12
1766,2018-01-03,2702.77,50.00,,249.55,2.84,2705.82,50.06,,31.99


In [211]:
a = pd.to_datetime(df['Date'])

In [212]:
a = np.array(a)

In [213]:
a

array(['1900-01-01T00:00:00.000000000', '1900-01-02T00:00:00.000000000',
       '1900-01-03T00:00:00.000000000', ...,
       '2018-01-02T00:00:00.000000000', '2018-01-03T00:00:00.000000000',
       '2018-01-04T00:00:00.000000000'], dtype='datetime64[ns]')

In [214]:
sp_500 = df["SP500"]

In [215]:
sp_500

348        6.10
349        6.21
350        6.26
351        6.34
352        6.04
         ...   
1763    2664.34
1764    2789.80
1765    2705.16
1766    2702.77
1767    2642.19
Name: SP500, Length: 1420, dtype: float64

Make a new column that tells the bins that it has

In [217]:
print(len(a))
rows = len(a)
print(rows)
bin_size = 15*12
print(bin_size)

1420
1420
180


In [218]:
col_names = []
for i in range(len(a)):
    col_names.append("bin_"+str(i)) 

In [219]:
bins = pd.DataFrame(columns = col_names)
bins = bins.reindex(list(range(0, rows))).reset_index(drop=True)
bins

Unnamed: 0,bin_0,bin_1,bin_2,bin_3,bin_4,bin_5,bin_6,bin_7,bin_8,bin_9,...,bin_1410,bin_1411,bin_1412,bin_1413,bin_1414,bin_1415,bin_1416,bin_1417,bin_1418,bin_1419
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1415,,,,,,,,,,,...,,,,,,,,,,
1416,,,,,,,,,,,...,,,,,,,,,,
1417,,,,,,,,,,,...,,,,,,,,,,
1418,,,,,,,,,,,...,,,,,,,,,,


In [220]:
n = rows
lists = [[] for _ in range(n)]
lists
bins.columns

Index(['bin_0', 'bin_1', 'bin_2', 'bin_3', 'bin_4', 'bin_5', 'bin_6', 'bin_7',
       'bin_8', 'bin_9',
       ...
       'bin_1410', 'bin_1411', 'bin_1412', 'bin_1413', 'bin_1414', 'bin_1415',
       'bin_1416', 'bin_1417', 'bin_1418', 'bin_1419'],
      dtype='object', length=1420)

In [221]:
columns = len(bins.columns)

In [222]:
print(len(bins))
print(len(bins.columns))
print(bin_size)

1420
1420
180


In [224]:
def create_empty_lists(rows):    
    n = rows
    lists = [[] for i in range(n)]
    return lists
lists = create_empty_lists(rows)

In [225]:
lists

[[],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],


In [226]:
## Note for some reason the number of rows = 1770 and the number of columns = 1688
def bin_defined(columns, rows, bin_size, empty_list, sp_500):
    for i in range(len(columns)): ## For each bin (column)
        for y in range(rows): #for each row
            if i <= y <= i+bin_size: ##for each index within our bin our length assign the index to the correct bin 
                lists[i].append(1)
                
            else:
                lists[i].append(0)       
    
    return lists
l_of_l = bin_defined(columns = bins.columns, rows = len(bins), bin_size = 180, empty_list = lists, sp_500 = sp_500)

In [None]:
#multiply 

In [232]:
len(l_of_l)

1420

In [237]:
def convert_dataframe(l_of_l, sp_500,a):
    
    m = np.array(l_of_l)
    c = np.array(sp_500)
    np_bin = m * c[:, np.newaxis]
    
    col_names = []
    
    df = pd.DataFrame(np_bin)
    df = df.transpose()

    
    for i in range(len(l_of_l)):
        col_names.append("bin_" + str(i))
    df.columns = [col_names]
    
    df['date']= pd.Series(a)
    return df 
df = convert_dataframe(l_of_l, sp_500,a)

In [238]:
df

Unnamed: 0,bin_0,bin_1,bin_2,bin_3,bin_4,bin_5,bin_6,bin_7,bin_8,bin_9,...,bin_1411,bin_1412,bin_1413,bin_1414,bin_1415,bin_1416,bin_1417,bin_1418,bin_1419,date
0,6.1,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.0,0.00,0.00,0.0,0.00,0.00,0.00,1900-01-01
1,6.1,6.21,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.0,0.00,0.00,0.0,0.00,0.00,0.00,1900-01-02
2,6.1,6.21,6.26,0.00,0.00,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.0,0.00,0.00,0.0,0.00,0.00,0.00,1900-01-03
3,6.1,6.21,6.26,6.34,0.00,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.0,0.00,0.00,0.0,0.00,0.00,0.00,1900-01-04
4,6.1,6.21,6.26,6.34,6.04,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.0,0.00,0.00,0.0,0.00,0.00,0.00,1900-01-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1415,0.0,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.0,...,2456.22,2492.84,2557.0,2593.61,2664.34,0.0,0.00,0.00,0.00,2017-01-12
1416,0.0,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.0,...,2456.22,2492.84,2557.0,2593.61,2664.34,2789.8,0.00,0.00,0.00,2018-01-01
1417,0.0,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.0,...,2456.22,2492.84,2557.0,2593.61,2664.34,2789.8,2705.16,0.00,0.00,2018-01-02
1418,0.0,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.0,...,2456.22,2492.84,2557.0,2593.61,2664.34,2789.8,2705.16,2702.77,0.00,2018-01-03


### Step 2: Do analysis