In [1]:
#from pandas import read_csv
from pandas import datetime
import pandas as pd
import numpy as np
from matplotlib import pyplot
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn import metrics
%matplotlib inline

  from pandas import datetime


This notebook was heavily influenced by the notebook below, in fact, it is mostly just an application of the same technique in a different context: 

Source: https://github.com/aniketnmishra/PEAD

# EPS and STD predictions
- we will first have to predict the earnings of the companies using linear regression since 
1. Analyst estimates data for the stocks in the study period was not available from a reliable source.
2. Using a time series model like ARIMA: ARIMA model can be used to predict earnings based on previous earnings, only when the time series formed from previous lagged earnings is stationary(without trend and seasonality). In our case, the series was not stationary, hence ARIMA could not be used.

## Data preprocessing

In [2]:
fundamentals = pd.read_csv('../dataset/fundamentals_filtered.csv')

In [3]:
fundamentals

Unnamed: 0,datekey,ticker,dimension,calendardate,reportperiod,lastupdated,accoci,assets,assetsavg,assetsc,...,sharesbas,shareswa,shareswadil,sps,tangibles,taxassets,taxexp,taxliabilities,tbvps,workingcapital
0,2020-11-13,ZSAN,ARQ,2020-09-30,2020-09-30,2020-11-13,0.0,80452000.0,,44169000.0,...,102066218.0,77883158.0,77883158.0,0.000,80452000.0,0.0,0.0,0.0,1.033,30995000.0
1,2020-08-06,ZSAN,ARQ,2020-06-30,2020-06-30,2020-11-13,0.0,47712000.0,,11246000.0,...,68583356.0,54927408.0,54927408.0,0.000,47712000.0,0.0,0.0,0.0,0.869,-5300000.0
2,2020-05-14,ZSAN,ARQ,2020-03-31,2020-03-31,2020-11-13,0.0,55335000.0,,19180000.0,...,54361635.0,36266018.0,36266018.0,0.000,55335000.0,0.0,0.0,0.0,1.526,-1428000.0
3,2020-03-13,ZSAN,ARQ,2019-12-31,2019-12-31,2020-11-13,0.0,37670000.0,,6813000.0,...,54338912.0,18796759.0,18796759.0,0.000,37670000.0,0.0,0.0,0.0,2.004,-9424000.0
4,2019-11-14,ZSAN,ARQ,2019-09-30,2019-09-30,2020-11-13,0.0,36668000.0,,7271000.0,...,18230803.0,17832092.0,17832092.0,0.000,36668000.0,0.0,0.0,0.0,2.056,-6855000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19384,2016-05-12,AAME,ARQ,2016-03-31,2016-03-31,2020-11-11,5561000.0,300133000.0,,,...,20419486.0,20419486.0,,2.044,297589000.0,0.0,478000.0,175000.0,14.574,
19385,2016-03-29,AAME,ARQ,2015-12-31,2015-12-31,2020-11-11,4584000.0,314603000.0,,,...,20394007.0,20394007.0,,1.954,312059000.0,829000.0,30000.0,0.0,15.302,
19386,2015-11-10,AAME,ARQ,2015-09-30,2015-09-30,2020-11-11,4807000.0,317203000.0,,,...,20547430.0,20547430.0,,1.964,314659000.0,310000.0,127000.0,0.0,15.314,
19387,2015-08-12,AAME,ARQ,2015-06-30,2015-06-30,2020-11-11,7235000.0,327225000.0,,,...,20592690.0,20592690.0,,2.174,324681000.0,0.0,844000.0,338000.0,15.767,


In [4]:
fundamentals['ticker'].nunique()

843

In [5]:
# we want only the epsusd, calendar and Ticker columns left
data = pd.DataFrame(columns=['ticker', 'date', 'eps'])


In [6]:
# read them into the data dataframe
data['ticker'] = fundamentals['ticker']
data['date'] = fundamentals['calendardate']
data['eps'] = fundamentals['epsusd']

del fundamentals 

In [7]:
data

Unnamed: 0,ticker,date,eps
0,ZSAN,2020-09-30,-0.11
1,ZSAN,2020-06-30,-0.14
2,ZSAN,2020-03-31,-0.24
3,ZSAN,2019-12-31,-0.45
4,ZSAN,2019-09-30,-0.55
...,...,...,...
19384,AAME,2016-03-31,0.04
19385,AAME,2015-12-31,0.00
19386,AAME,2015-09-30,0.01
19387,AAME,2015-06-30,0.16


In [8]:
data['date'].unique()
#the date represents all the quarters in the data from 2015 q1 onwards till 2020 q3

array(['2020-09-30', '2020-06-30', '2020-03-31', '2019-12-31',
       '2019-09-30', '2019-06-30', '2019-03-31', '2018-12-31',
       '2018-09-30', '2018-06-30', '2018-03-31', '2017-12-31',
       '2017-09-30', '2017-06-30', '2017-03-31', '2016-12-31',
       '2016-09-30', '2016-06-30', '2016-03-31', '2015-12-31',
       '2015-09-30', '2015-06-30', '2015-03-31'], dtype=object)

In [9]:
column_name = data['date'].unique().tolist()
column_name

['2020-09-30',
 '2020-06-30',
 '2020-03-31',
 '2019-12-31',
 '2019-09-30',
 '2019-06-30',
 '2019-03-31',
 '2018-12-31',
 '2018-09-30',
 '2018-06-30',
 '2018-03-31',
 '2017-12-31',
 '2017-09-30',
 '2017-06-30',
 '2017-03-31',
 '2016-12-31',
 '2016-09-30',
 '2016-06-30',
 '2016-03-31',
 '2015-12-31',
 '2015-09-30',
 '2015-06-30',
 '2015-03-31']

In [10]:
a = pd.DataFrame(columns=column_name)
a

Unnamed: 0,2020-09-30,2020-06-30,2020-03-31,2019-12-31,2019-09-30,2019-06-30,2019-03-31,2018-12-31,2018-09-30,2018-06-30,...,2017-06-30,2017-03-31,2016-12-31,2016-09-30,2016-06-30,2016-03-31,2015-12-31,2015-09-30,2015-06-30,2015-03-31


In [11]:
Ticker = data['ticker'].unique()
Ticker

array(['ZSAN', 'ZNOG', 'ZIVO', 'ZGNX', 'ZEUS', 'ZAGG', 'YEWB', 'XXII',
       'XTNT', 'XSPA', 'XONE', 'XENE', 'XELB', 'WYY', 'WWR', 'WVVI',
       'WVFC', 'WTT', 'WSTL', 'WSTG', 'WNEB', 'WLFC', 'WLDN', 'WHLR',
       'WHLM', 'WHF', 'WDDD', 'WATT', 'VXRT', 'VUZI', 'VTNR', 'VPG',
       'VOXX', 'VNRX', 'VIVE', 'VIRC', 'VIDE', 'VGZ', 'VERU', 'VEC',
       'VCYT', 'VCRA', 'VCEL', 'VBTX', 'VBFC', 'VASO', 'VALU', 'VABK',
       'UWHR', 'UUU', 'UTI', 'USNU', 'USIO', 'USAP', 'USAK', 'URG',
       'UPLD', 'UONE', 'UNB', 'UNAM', 'UMH', 'ULBI', 'UG', 'UFPT', 'UEC',
       'UCTT', 'UBOH', 'UBFO', 'UBCP', 'UAMY', 'TZOO', 'TWIN', 'TSRI',
       'TSQ', 'TSBK', 'TRXC', 'TRVN', 'TRUP', 'TRNS', 'TPVG', 'TPCS',
       'TOMZ', 'TOFB', 'TNXP', 'TNLX', 'TMQ', 'TMBR', 'TLYS', 'TKOI',
       'TIPT', 'TIKK', 'THMO', 'THM', 'TGLS', 'TGHI', 'TGEN', 'TGC',
       'TESS', 'TENX', 'TELL', 'TCON', 'TCI', 'TCFC', 'TBK', 'TAYD',
       'TAT', 'TAIT', 'TACO', 'SYNL', 'SYNC', 'SYN', 'SWKH', 'SVT',
       'SVBI', 'SUWN',

In [12]:
#insert ticker column into the dataframe
# a['Ticker'] = Ticker
# a['Ticker']

In [13]:
data.set_index('date')

Unnamed: 0_level_0,ticker,eps
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-09-30,ZSAN,-0.11
2020-06-30,ZSAN,-0.14
2020-03-31,ZSAN,-0.24
2019-12-31,ZSAN,-0.45
2019-09-30,ZSAN,-0.55
...,...,...
2016-03-31,AAME,0.04
2015-12-31,AAME,0.00
2015-09-30,AAME,0.01
2015-06-30,AAME,0.16


In [14]:
#export eps data 
data_ticker = data['ticker'].unique()
data_ticker = data_ticker.tolist()
for i,value in enumerate(data_ticker):
    data[data['ticker'] == value].to_csv(f"../dataset/eps/{value}.csv",index = True, na_rep = 'N/A')
# we export it in a way that each ticker has its own csv file with the datekey as index

In [15]:
tickers = data_ticker

In [16]:
#dataframe for stocks eps
stocks_eps = (
    (pd.concat(
        [pd.read_csv(f"../dataset/eps/{ticker}.csv")[
            'eps'
        ].rename(ticker)
        for ticker in tickers],
        axis=1,
        sort=True)
    )
)

stocks_eps  = stocks_eps.loc[:,~stocks_eps.columns.duplicated()]

In [17]:
stocks_eps['date'] = pd.DataFrame(column_name)

In [18]:
data

Unnamed: 0,ticker,date,eps
0,ZSAN,2020-09-30,-0.11
1,ZSAN,2020-06-30,-0.14
2,ZSAN,2020-03-31,-0.24
3,ZSAN,2019-12-31,-0.45
4,ZSAN,2019-09-30,-0.55
...,...,...,...
19384,AAME,2016-03-31,0.04
19385,AAME,2015-12-31,0.00
19386,AAME,2015-09-30,0.01
19387,AAME,2015-06-30,0.16


In [19]:
#set date as the index
stocks_eps.set_index('date')

Unnamed: 0_level_0,ZSAN,ZNOG,ZIVO,ZGNX,ZEUS,ZAGG,YEWB,XXII,XTNT,XSPA,...,ACRX,ACNB,ACHV,ACFN,ACER,ABMC,ABIO,ABEO,ABCP,AAME
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-09-30,-0.11,-0.01,-0.01,-1.08,-0.13,0.21,0.02,-0.03,-0.1,-0.1,...,-0.1,0.79,-1.14,0.0,-0.51,-0.01,-0.33,-0.08,-0.04,0.09
2020-06-30,-0.14,-0.01,0.0,-0.96,-0.56,-0.11,0.01,-0.04,-0.19,-1.51,...,-0.08,0.67,-1.68,0.0,-0.56,0.0,-0.73,-0.14,-0.02,0.31
2020-03-31,-0.24,-0.01,-0.01,-0.54,0.05,-2.54,0.0,-0.03,-0.19,-1.74,...,-0.2,-0.14,-2.2,-0.01,-0.49,-0.01,-0.83,-0.52,-0.03,-0.4
2019-12-31,-0.45,-0.01,-0.01,-1.2,-0.07,0.86,-0.04,-0.05,-0.13,6.0,...,-0.18,0.72,-3.8,0.0,-0.51,-0.01,-0.69,-0.29,-0.02,0.06
2019-09-30,-0.55,-0.02,-0.01,-6.75,0.05,0.3,0.02,-0.08,-0.14,-5.04,...,-0.16,0.89,-9.0,0.0,-0.52,0.0,-0.76,-0.35,-0.03,-0.07
2019-06-30,-0.55,-0.02,-0.01,-0.89,0.18,-0.18,0.03,-0.06,-0.15,-9.66,...,-0.16,0.92,-10.0,-0.01,-1.09,0.0,-1.14,-0.49,-0.05,-0.22
2019-03-31,-0.79,-0.03,-0.02,-0.83,0.18,-0.5,0.01,-0.02,-0.21,-4.83,...,-0.17,0.83,-17.6,-0.01,-0.79,-0.01,-1.86,-0.39,-0.02,0.2
2018-12-31,-0.81,-0.52,-0.01,-0.49,-0.12,0.52,-0.06,-0.07,-4.78,-9.6,...,-0.17,0.75,1.8,-0.01,-0.9,-0.01,-1.8,-0.42,0.02,0.01
2018-09-30,-0.68,0.02,-0.03,-1.08,1.01,0.52,-0.01,0.05,-0.24,-6.6,...,-0.21,0.86,-14.2,0.0,-0.43,-0.01,-1.98,-0.34,-0.02,0.04
2018-06-30,-0.75,-0.02,-0.02,-0.83,1.39,0.11,0.03,-0.05,-0.38,-7.8,...,-0.2,0.78,-36.4,-0.02,-0.64,0.0,-2.7,-0.25,-0.03,0.15


In [20]:
stocks_eps.isnull().values.any()

True

In [21]:
#find how many nulls
stocks_eps.isnull().sum().sum()

52

In [22]:
# find which tickers have null
null = pd.DataFrame(stocks_eps.isnull().sum())
null

Unnamed: 0,0
ZSAN,0
ZNOG,0
ZIVO,0
ZGNX,0
ZEUS,0
...,...
ABIO,0
ABEO,0
ABCP,0
AAME,0


In [23]:
null.loc[null[0]==1]

Unnamed: 0,0
UAMY,1
TENX,1
MATN,1
JCAP,1
EMGCQ,1


In [24]:
null.loc[null[0]==2]

Unnamed: 0,0
CRK,2
CKX,2


In [31]:
#backfill the null with ffill because we are assuming that we do not know the next data, so we can't use bfill
stocks_eps = stocks_eps.fillna(method='ffill')

In [32]:
stocks_eps.isnull().sum().sum()

7

In [34]:
nan_cols = [i for i in stocks_eps.columns if stocks_eps[i].isnull().any()]
nan_cols

['SRGZ', 'MATN', 'ARTW']

In [35]:
stocks_eps['SRGZ']

0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
5     0.00
6     0.00
7     0.00
8     0.00
9     0.00
10   -0.01
11    0.00
12   -0.01
13   -0.01
14    0.00
15    0.00
16   -0.01
17   -0.01
18    0.00
19    0.00
20   -0.01
21    0.00
22    0.00
Name: SRGZ, dtype: float64

In [36]:
stocks_eps[nan_cols]

Unnamed: 0,SRGZ,MATN,ARTW
0,,,
1,,0.01,-0.18
2,,-0.05,-0.1
3,,-0.05,-0.1
4,,-0.01,-0.1
5,0.0,-0.02,-0.08
6,0.0,-0.02,-0.14
7,0.0,-0.01,-0.34
8,0.0,-0.02,-0.18
9,0.0,-0.02,-0.16


In [37]:
#the remaining Na we will do a backfill since it doesn't really affect the result much
stocks_eps = stocks_eps.fillna(method='bfill')

In [38]:
stocks_eps.isnull().sum().sum()

0

## Transforming the dataframe


In [39]:
df = stocks_eps

In [44]:
# we want to make the column into the index and index into the column
df_transposed = df.transpose()
df_transposed

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
ZSAN,-0.11,-0.14,-0.24,-0.45,-0.55,-0.55,-0.79,-0.81,-0.68,-0.75,...,-3.4,-6.8,-8.8,-10.4,-10.8,-13.6,-12.8,-14.4,-12.6,-9.4
ZNOG,-0.01,-0.01,-0.01,-0.01,-0.02,-0.02,-0.03,-0.52,0.02,-0.02,...,-0.1,-0.07,-0.04,-0.04,-0.08,-0.04,-0.02,-0.05,-0.06,-0.07
ZIVO,-0.01,0,-0.01,-0.01,-0.01,-0.01,-0.02,-0.01,-0.03,-0.02,...,-0.01,-0.01,-0.01,-0.01,-0.02,-0.01,0,-0.01,-0.02,-0.01
ZGNX,-1.08,-0.96,-0.54,-1.2,-6.75,-0.89,-0.83,-0.49,-1.08,-0.83,...,-0.93,-0.86,-0.94,-0.69,-0.76,-0.42,-0.5,-0.65,3.78,-1.2
ZEUS,-0.13,-0.56,0.05,-0.07,0.05,0.18,0.18,-0.12,1.01,1.39,...,0.42,0.68,-0.19,-0.16,0.32,-0.07,-0.44,-0.05,-1.99,0.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ABIO,-0.33,-0.73,-0.83,-0.69,-0.76,-1.14,-1.86,-1.8,-1.98,-2.7,...,-10.619,-8.639,-8.279,-9.179,-7.739,-7.199,-5.04,-5.58,-12.599,-16.378
ABEO,-0.08,-0.14,-0.52,-0.29,-0.35,-0.49,-0.39,-0.42,-0.34,-0.25,...,-0.21,-0.13,-0.19,-0.08,-0.2,-0.17,-0.06,-0.19,-0.16,-0.1
ABCP,-0.04,-0.02,-0.03,-0.02,-0.03,-0.05,-0.02,0.02,-0.02,-0.03,...,-0.03,-0.03,-0.01,-0.02,-0.02,-0.03,-0.02,-0.04,-0.03,-0.03
AAME,0.09,0.31,-0.4,0.06,-0.07,-0.22,0.2,0.01,0.04,0.15,...,0.07,-0.02,0.06,0,0.01,0.04,0,0.01,0.16,0.03


In [52]:
# set date as column name 
df_transposed = df_transposed.rename(columns=df_transposed.iloc[-1]).drop(['date'])


KeyError: "['date'] not found in axis"

In [54]:
#remove ticker as index
df_transposed.reset_index(inplace=True)

In [61]:
#rename index as Ticker
df_transposed.rename(columns ={'index': 'Ticker'}, inplace=True)

In [62]:
df_transposed

Unnamed: 0,Ticker,2020-09-30,2020-06-30,2020-03-31,2019-12-31,2019-09-30,2019-06-30,2019-03-31,2018-12-31,2018-09-30,...,2017-06-30,2017-03-31,2016-12-31,2016-09-30,2016-06-30,2016-03-31,2015-12-31,2015-09-30,2015-06-30,2015-03-31
0,ZSAN,-0.11,-0.14,-0.24,-0.45,-0.55,-0.55,-0.79,-0.81,-0.68,...,-3.4,-6.8,-8.8,-10.4,-10.8,-13.6,-12.8,-14.4,-12.6,-9.4
1,ZNOG,-0.01,-0.01,-0.01,-0.01,-0.02,-0.02,-0.03,-0.52,0.02,...,-0.1,-0.07,-0.04,-0.04,-0.08,-0.04,-0.02,-0.05,-0.06,-0.07
2,ZIVO,-0.01,0,-0.01,-0.01,-0.01,-0.01,-0.02,-0.01,-0.03,...,-0.01,-0.01,-0.01,-0.01,-0.02,-0.01,0,-0.01,-0.02,-0.01
3,ZGNX,-1.08,-0.96,-0.54,-1.2,-6.75,-0.89,-0.83,-0.49,-1.08,...,-0.93,-0.86,-0.94,-0.69,-0.76,-0.42,-0.5,-0.65,3.78,-1.2
4,ZEUS,-0.13,-0.56,0.05,-0.07,0.05,0.18,0.18,-0.12,1.01,...,0.42,0.68,-0.19,-0.16,0.32,-0.07,-0.44,-0.05,-1.99,0.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
838,ABMC,-0.01,0,-0.01,-0.01,0,0,-0.01,-0.01,-0.01,...,-0.01,0,0,-0.01,0,0,-0.01,0,-0.01,0
839,ABIO,-0.33,-0.73,-0.83,-0.69,-0.76,-1.14,-1.86,-1.8,-1.98,...,-10.619,-8.639,-8.279,-9.179,-7.739,-7.199,-5.04,-5.58,-12.599,-16.378
840,ABEO,-0.08,-0.14,-0.52,-0.29,-0.35,-0.49,-0.39,-0.42,-0.34,...,-0.21,-0.13,-0.19,-0.08,-0.2,-0.17,-0.06,-0.19,-0.16,-0.1
841,ABCP,-0.04,-0.02,-0.03,-0.02,-0.03,-0.05,-0.02,0.02,-0.02,...,-0.03,-0.03,-0.01,-0.02,-0.02,-0.03,-0.02,-0.04,-0.03,-0.03


# Predictions

In [69]:
column_name = df_transposed.columns[1:25].tolist()
# we save the column_name to a variable

In [70]:
column_name

['2020-09-30',
 '2020-06-30',
 '2020-03-31',
 '2019-12-31',
 '2019-09-30',
 '2019-06-30',
 '2019-03-31',
 '2018-12-31',
 '2018-09-30',
 '2018-06-30',
 '2018-03-31',
 '2017-12-31',
 '2017-09-30',
 '2017-06-30',
 '2017-03-31',
 '2016-12-31',
 '2016-09-30',
 '2016-06-30',
 '2016-03-31',
 '2015-12-31',
 '2015-09-30',
 '2015-06-30',
 '2015-03-31']

In [79]:
len(column_name)

23

In [None]:
check the shape of each stock

In [92]:
data = df_transposed

In [94]:
for i in range(len(data)):
    X = np.asarray([i for i in range(16)]).reshape(-1,1)


In [95]:
X

array([[ 0],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11],
       [12],
       [13],
       [14],
       [15]])

In [112]:
# for i in range(len(data)):    
#     print((data.iloc[[i]]))

In [116]:
#dennis noteboook
data.head()

Unnamed: 0,Ticker,2020-09-30,2020-06-30,2020-03-31,2019-12-31,2019-09-30,2019-06-30,2019-03-31,2018-12-31,2018-09-30,...,2017-06-30,2017-03-31,2016-12-31,2016-09-30,2016-06-30,2016-03-31,2015-12-31,2015-09-30,2015-06-30,2015-03-31
0,ZSAN,-0.11,-0.14,-0.24,-0.45,-0.55,-0.55,-0.79,-0.81,-0.68,...,-3.4,-6.8,-8.8,-10.4,-10.8,-13.6,-12.8,-14.4,-12.6,-9.4
1,ZNOG,-0.01,-0.01,-0.01,-0.01,-0.02,-0.02,-0.03,-0.52,0.02,...,-0.1,-0.07,-0.04,-0.04,-0.08,-0.04,-0.02,-0.05,-0.06,-0.07
2,ZIVO,-0.01,0.0,-0.01,-0.01,-0.01,-0.01,-0.02,-0.01,-0.03,...,-0.01,-0.01,-0.01,-0.01,-0.02,-0.01,0.0,-0.01,-0.02,-0.01
3,ZGNX,-1.08,-0.96,-0.54,-1.2,-6.75,-0.89,-0.83,-0.49,-1.08,...,-0.93,-0.86,-0.94,-0.69,-0.76,-0.42,-0.5,-0.65,3.78,-1.2
4,ZEUS,-0.13,-0.56,0.05,-0.07,0.05,0.18,0.18,-0.12,1.01,...,0.42,0.68,-0.19,-0.16,0.32,-0.07,-0.44,-0.05,-1.99,0.1


In [115]:
# dennis' edition

pred_dict = {}
std_dict = {}
for i in range(len(data)):
#     try:
    prediction=[]
    stddev=[]
    for j in range(1,24):
#             print(f"j is {j}")
            series = data.iloc[i][(j+1):(j+17)]
#             print(f"s:{series}, p:{prediction}, std:{stddev}")
            X = np.asarray([i for i in range(16)]).reshape(-1,1)
            y = series.copy()
            print(y)
            X_pred =np.asarray([16]).reshape(-1,1)
            regressor = LinearRegression()  
            regressor.fit(X, y)
            y_pred = regressor.predict(X_pred)
            std = np.std(series)
            prediction.append(y_pred[0])
            stddev.append(std)
    pred_dict[data.loc[i][0]] = prediction
    std_dict[data.loc[i][0]] = stddev
#     except ValueError:
#             print(j)
pred = pd.DataFrame.from_dict(pred_dict, orient='index',columns=column_name)
stdev = pd.DataFrame.from_dict(std_dict, orient='index',columns=column_name)
#pred.head()

2020-06-30   -0.14
2020-03-31   -0.24
2019-12-31   -0.45
2019-09-30   -0.55
2019-06-30   -0.55
2019-03-31   -0.79
2018-12-31   -0.81
2018-09-30   -0.68
2018-06-30   -0.75
2018-03-31   -4.16
2017-12-31   -3.62
2017-09-30      -4
2017-06-30    -3.4
2017-03-31    -6.8
2016-12-31    -8.8
2016-09-30   -10.4
Name: 0, dtype: object
2020-03-31   -0.24
2019-12-31   -0.45
2019-09-30   -0.55
2019-06-30   -0.55
2019-03-31   -0.79
2018-12-31   -0.81
2018-09-30   -0.68
2018-06-30   -0.75
2018-03-31   -4.16
2017-12-31   -3.62
2017-09-30      -4
2017-06-30    -3.4
2017-03-31    -6.8
2016-12-31    -8.8
2016-09-30   -10.4
2016-06-30   -10.8
Name: 0, dtype: object
2019-12-31   -0.45
2019-09-30   -0.55
2019-06-30   -0.55
2019-03-31   -0.79
2018-12-31   -0.81
2018-09-30   -0.68
2018-06-30   -0.75
2018-03-31   -4.16
2017-12-31   -3.62
2017-09-30      -4
2017-06-30    -3.4
2017-03-31    -6.8
2016-12-31    -8.8
2016-09-30   -10.4
2016-06-30   -10.8
2016-03-31   -13.6
Name: 0, dtype: object
2019-09-30   -0.55


ValueError: Found input variables with inconsistent numbers of samples: [16, 15]

In [None]:
pred

In [29]:
stop

NameError: name 'stop' is not defined

In [None]:
stocks_eps.isnull().sum().sum()

In [None]:
stocks_eps.dropna()

In [None]:
don't set index until you put the data in

In [None]:
a.set_index('Ticker')
#set Ticker as the index

In [None]:
#insert each quarter's earnings for each ticker into the dataframe 


In [None]:
a.insert(0, {'ticker': 45})

In [None]:
a