In [137]:
import pickle

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

# ignore warnings because they are not relevant
import warnings
warnings.filterwarnings('ignore')

# suppress the scientific notation when printing numpy arrays
np.set_printoptions(suppress=True)

# Loading the data

In [138]:
stock_index = 'SSMI'

In [139]:
stocks = pd.read_csv("./data/StockIndices.csv",
                 decimal=',')

# stocks date format: 29/10/2018
stocks['Date'] = pd.to_datetime(stocks['Date'], format='%d/%m/%Y')
stocks.set_index('Date', inplace=True)
stocks.drop(columns='Unnamed: 0', inplace=True)
stocks.head()

# converting prices to floats
stocks['Price Close'] = [float(price) for price in stocks['Price Close']]

# data cleansing
stocks['Index'] = [name.replace(".", "") for name in stocks['Index'].values]

stock_index = stocks[stocks['Index'] == stock_index]
stock_index.head()

Unnamed: 0_level_0,Index,Price Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2008-10-30,SSMI,5119.919376
2008-10-31,SSMI,5315.488943
2008-11-03,SSMI,5311.306126
2008-11-04,SSMI,5520.079351
2008-11-05,SSMI,5330.643769


In [140]:
weather = pd.read_csv("./data/Weather_ALL.csv",
                 sep=';',
                 decimal=',')

# weather date format: 29/10/2018
weather['Date'] = pd.to_datetime(weather['Date'], format='%d/%m/%Y')
weather.set_index('Date', inplace=True)
weather.head()

# drop NaNs
weather.dropna(inplace=True)

weather.head()

Unnamed: 0_level_0,City,Mean Temperature Actual,Low Temperature Actual,High Temperature Actual,Precipitation Actual,Wind Speed Actual,Relative Humidity Actual
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-10-25,Boston,6.64,3.28,10.0,0.1,16.44,69.57
2018-10-24,Boston,7.19,5.0,9.39,0.1,18.07,76.07
2018-10-23,Boston,9.19,5.0,13.3,1.04,13.83,77.78
2018-10-22,Boston,6.65,1.1,12.2,0.0,14.37,49.61
2018-10-21,Boston,7.19,2.8,11.7,0.08,24.0,50.89


# Hypothesis: Index depends on global weather conditions

In [141]:
# GENERAGE SEASON BINARIES
stock_index['weekday'] = stock_index.index.dayofweek
stock_index['month'] = stock_index.index.month

# Mondays are bad ^^
stock_index['Monday'] = np.where(stock_index['weekday'] == 0, 1, 0)

# Winter is November, December, October
stock_index['Winter'] = np.where((stock_index['month'] == 11) | 
                       (stock_index['month'] == 12) | 
                       (stock_index['month'] == 1), 1, 0)

In [142]:
stock_index.head()

Unnamed: 0_level_0,Index,Price Close,weekday,month,Monday,Winter
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2008-10-30,SSMI,5119.919376,3,10,0,0
2008-10-31,SSMI,5315.488943,4,10,0,0
2008-11-03,SSMI,5311.306126,0,11,1,1
2008-11-04,SSMI,5520.079351,1,11,0,1
2008-11-05,SSMI,5330.643769,2,11,0,1


In [174]:
# Creating weather binaries for each city

cities = dict() 
for city in weather['City'].unique():
    
    city_name = city
    
    city = weather[weather['City'] == city].copy()
    
    city['VeryCold'] = np.where(city['Mean Temperature Actual'] <= np.percentile(city['Mean Temperature Actual'], q=25), 1, 0)
    city['HeavyRain'] = np.where(city['Precipitation Actual'] >= np.percentile(city['Precipitation Actual'], q=75), 1, 0)
    city['ColdRain'] = city['HeavyRain']*city['VeryCold']
    
    cities[city_name] = city

In [175]:
# creating global weather binaries that are only true if it's in every location
stock_index['global_cold'] = np.ones(len(stock_index))
stock_index['global_rain'] = np.ones(len(stock_index))
stock_index['global_coldrain'] = np.ones(len(stock_index))

for city_name, city in cities.items():
    stock_index['global_cold'] = (stock_index['global_cold'] == 1) & (city['VeryCold'] == 1)
    stock_index['global_rain'] = (stock_index['global_rain'] == 1) & (city['HeavyRain'] == 1)
    stock_index['global_coldrain'] = (stock_index['global_coldrain'] == 1) & (city['ColdRain'] == 1)

In [176]:
# convert to integers
stock_index['global_cold'] = stock_index['global_cold'] * 1
stock_index['global_rain'] = stock_index['global_rain'] * 1 
stock_index['global_coldrain'] = stock_index['global_coldrain'] * 1

In [177]:
stock_index['global_cold'].sum() / len(stock_index)

0.04906094288999617

In [178]:
stock_index['global_rain'].sum() / len(stock_index)

0.0011498658489842851

In [179]:
stock_index['global_coldrain'].sum() / len(stock_index)

0.0

# Linear Regression

In [180]:
def estimate_linear(df, dependent, regressors):
    y = df[dependent]
    X = sm.add_constant(df[regressors])
    model = sm.OLS(y, X).fit(cov_type='HAC',cov_kwds={'maxlags':20})
    predictions = model.predict(X)
    return model, predictions

def plot_and_save_stats(model, name):
    plt.close()
    plt.rc('figure', figsize=(12, 7))
    plt.text(0.01, 0.05, model.summary(), {'fontsize': 10}, fontproperties = 'monospace') # approach improved by OP -> monospace!
    plt.axis('off')
    plt.tight_layout()
    #plt.savefig('/{}_statistics.png'.format(name))

In [181]:
# calculate log returns
stock_index['LogReturns'] = np.log(stock_index['Price Close'] / stock_index['Price Close'].shift(1))

In [182]:
regressors = ['global_cold', 'global_rain', 'global_coldrain', 'weekday', 'month', 'Monday', 'Winter']

model, predictions = estimate_linear(stock_index.dropna(), 'LogReturns', regressors)

In [183]:
model.summary()

0,1,2,3
Dep. Variable:,LogReturns,R-squared:,0.001
Model:,OLS,Adj. R-squared:,-0.001
Method:,Least Squares,F-statistic:,0.5709
Date:,"Thu, 01 Nov 2018",Prob (F-statistic):,0.754
Time:,15:50:52,Log-Likelihood:,7969.9
No. Observations:,2608,AIC:,-15930.0
Df Residuals:,2601,BIC:,-15880.0
Df Model:,6,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0007,0.001,0.890,0.374,-0.001,0.002
global_cold,-3.078e-05,0.001,-0.031,0.975,-0.002,0.002
global_rain,0.0057,0.005,1.071,0.284,-0.005,0.016
global_coldrain,-6.162e-19,4.56e-19,-1.352,0.176,-1.51e-18,2.77e-19
weekday,-0.0003,0.000,-1.157,0.247,-0.001,0.000
month,4.743e-05,6.91e-05,0.686,0.492,-8.8e-05,0.000
Monday,-0.0009,0.001,-1.150,0.250,-0.002,0.001
Winter,-0.0003,0.001,-0.543,0.587,-0.001,0.001

0,1,2,3
Omnibus:,288.275,Durbin-Watson:,2.07
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2593.156
Skew:,-0.017,Prob(JB):,0.0
Kurtosis:,7.885,Cond. No.,4e+18


# Merging prices and weather

In [153]:
#data = pd.merge(stock_index, weather, on='Date')