# Summary Statistics, EDA, and Regression Analysis

In [1]:
#Import Python Packages/Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import statsmodels.formula.api as sm
from stargazer.stargazer import Stargazer
from IPython.core.display import HTML
from pandas.plotting import table
from tabulate import tabulate

In [2]:
#Load data
data = pd.read_csv('Data/stata_data.csv')
data.head()

Unnamed: 0,CI,ISF,NASDAQ,NYSE,Other,one_m_diff_volume,three_m_diff_volume,six_m_diff_volume,one_y_diff_volume,two_y_diff_volume,...,three_y_diff_sp500,Date_End(t=1),Date_End(t=12),Date_End(t=24),Date_End(t=3),Date_End(t=36),Date_End(t=6),Sharing Firm Introduced,Relatedness with Sharing Firm,Exchange
0,1,0,1,0,0,-27.602686,-28.601259,-12.906301,63.017803,75.875492,...,5.121565,2008-08-31,2008-08-31,2008-08-31,2008-08-31,2008-08-31,2008-08-31,No,Closely Related,NASDAQ
1,1,1,1,0,0,219.559358,-45.717575,150.587447,12.8733,42.148715,...,-4.984293,2008-09-30,2009-08-31,2010-08-31,2008-11-30,2011-08-31,2009-02-28,Yes,Closely Related,NASDAQ
2,0,0,0,0,1,113.633717,-14.8396,-82.271666,-49.24844,38.959852,...,5.121565,2008-08-31,2008-08-31,2008-08-31,2008-08-31,2008-08-31,2008-08-31,No,Less Closely Related,Other
3,0,1,0,0,1,28.894713,-14.088155,72.496809,-45.839472,727.145834,...,-4.984293,2008-09-30,2009-08-31,2010-08-31,2008-11-30,2011-08-31,2009-02-28,Yes,Less Closely Related,Other
4,1,0,0,0,1,751.861702,10.71552,605.506608,4827.692308,463.908451,...,27.737807,2017-09-30,2017-09-30,2017-09-30,2017-09-30,2017-09-30,2017-09-30,No,Closely Related,Other


In [3]:
#No Missing Data
np.where(data.isnull())

(array([], dtype=int64), array([], dtype=int64))

In [4]:
#Create New Dataframes with Variables for Each Time Period
onemonthdata = data[['CI', 'ISF', 'NASDAQ', 'NYSE', 'Other', 'one_m_diff_volume', 'one_m_diff_shareprice', 'one_m_diff_sp500','Sharing Firm Introduced', 'Relatedness with Sharing Firm', 'Exchange']]
threemonthdata = data[['CI', 'ISF', 'NASDAQ', 'NYSE', 'Other', 'three_m_diff_volume', 'three_m_diff_shareprice', 'three_m_diff_sp500','Sharing Firm Introduced', 'Relatedness with Sharing Firm', 'Exchange']]
sixmonthdata = data[['CI', 'ISF', 'NASDAQ', 'NYSE', 'Other', 'six_m_diff_volume', 'six_m_diff_shareprice', 'six_m_diff_sp500','Sharing Firm Introduced', 'Relatedness with Sharing Firm', 'Exchange']]
oneyeardata = data[['CI', 'ISF', 'NASDAQ', 'NYSE', 'Other', 'one_y_diff_volume', 'one_y_diff_shareprice', 'one_y_diff_sp500','Sharing Firm Introduced', 'Relatedness with Sharing Firm', 'Exchange']]
twoyeardata = data[['CI', 'ISF', 'NASDAQ', 'NYSE', 'Other', 'two_y_diff_volume', 'two_y_diff_shareprice', 'two_y_diff_sp500','Sharing Firm Introduced', 'Relatedness with Sharing Firm', 'Exchange']]
threeyeardata = data[['CI', 'ISF', 'NASDAQ', 'NYSE', 'Other', 'three_y_diff_volume', 'three_y_diff_shareprice', 'three_y_diff_sp500','Sharing Firm Introduced', 'Relatedness with Sharing Firm', 'Exchange']]

## Descriptive Statistics Tables

In [5]:
#Crpsstab of Categorical Variable Counts
catsummstats = pd.crosstab(data['Exchange'],[data['Sharing Firm Introduced'],data['Relatedness with Sharing Firm']],margins=True, margins_name= "Total")
catsummstats

Sharing Firm Introduced,No,No,Yes,Yes,Total
Relatedness with Sharing Firm,Closely Related,Less Closely Related,Closely Related,Less Closely Related,Unnamed: 5_level_1
Exchange,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
NASDAQ,8,9,8,9,34
NYSE,21,21,21,21,84
Other,3,2,3,2,10
Total,32,32,32,32,128


In [6]:
#Descriptive Statistics for % Change in Share Price and Volume at t = (1,3,6,12)
summstats = data[['one_m_diff_shareprice','three_m_diff_shareprice','six_m_diff_shareprice','one_y_diff_shareprice','two_y_diff_shareprice','three_y_diff_shareprice','one_m_diff_volume','three_m_diff_volume','six_m_diff_volume','one_y_diff_volume','two_y_diff_volume','three_y_diff_volume','one_m_diff_sp500','three_m_diff_sp500','six_m_diff_sp500','one_y_diff_sp500','two_y_diff_sp500','three_y_diff_sp500']].describe().transpose()
summstats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
one_m_diff_shareprice,128.0,0.221725,9.346026,-24.570728,-5.273614,0.656898,5.966184,27.44158
three_m_diff_shareprice,128.0,2.768676,19.415373,-45.226774,-8.293984,2.575356,12.531634,53.28529
six_m_diff_shareprice,128.0,6.718731,28.315888,-56.081946,-8.565248,2.842428,16.459546,161.231884
one_y_diff_shareprice,128.0,18.985659,48.901747,-77.362205,-11.885776,13.628225,37.527171,213.499513
two_y_diff_shareprice,128.0,41.808234,169.229427,-71.560611,-9.822332,20.875239,49.511847,1814.107725
three_y_diff_shareprice,128.0,45.811341,117.177361,-83.73444,-20.925556,17.926998,79.69066,915.035714
one_m_diff_volume,128.0,105.029528,386.830189,-94.4,-40.204664,0.539849,114.439265,3894.0
three_m_diff_volume,128.0,61.356665,278.59103,-95.289471,-46.212218,1.390051,64.152579,2698.589801
six_m_diff_volume,128.0,96.425443,334.153877,-93.034199,-42.362757,5.783472,91.222924,2580.0
one_y_diff_volume,128.0,86.81598,474.86776,-95.1331,-38.260425,10.220187,61.490653,4827.692308


## EDA Plots

In [None]:
sns.set(context='paper', style='ticks',palette='tab10',
rc={'font.family': 'sans-serif',
 'font.sans-serif':
  'Tahoma',
  #'font.weight':'heavy',
  #'axes.labelweight':'normal',
  #'axes.labelsize':'medium',
  #'axes.titlesize':'medium',
  #'axes.titleweight':'normal',
  #'font.size':16,
 'axes.spines.right': False,
 'axes.spines.top': False,
 #'xtick.labelsize':'small',
 #'ytick.labelsize':'small',
 'axes.labelpad':12,
 #'legend.fontsize':10,
 'scatter.edgecolors': None})

In [None]:
#Boxplots of the Categorical Data for Each Time Period
CATsvsOneM = sns.catplot(x="Sharing Firm Introduced", y="one_m_diff_shareprice",hue='Relatedness with Sharing Firm',col='Exchange', kind='box',data=onemonthdata)
CATsvsOneM.set_ylabels('% Change in Share Price (t=1)')
# CATsvsOneM.fig.suptitle('Figure 3. one_m_diff_shareprice vs. Categorical Variables', weight='bold',y=1.05,x=.15)
CATsvsOneM.savefig("Regression Output:Visualizations/EDA Plots/BoxplotsofSharePricevsCats/onemonthcats.png", bbox_inches='tight')

CATsvsThreeM = sns.catplot(x="Sharing Firm Introduced", y="three_m_diff_shareprice",hue='Relatedness with Sharing Firm',col='Exchange',kind='box',data=threemonthdata)
CATsvsThreeM.set_ylabels('% Change in Share Price (t=3)')
#CATsvsThreeM.fig.suptitle('Figure 4. three_m_diff_shareprice vs. Categorical Variables', weight='bold',y=1.05,x=.15)
CATsvsThreeM.savefig("Regression Output:Visualizations/EDA Plots/BoxplotsofSharePricevsCats/threemonthcats.png", bbox_inches='tight')

CATsvsSixM = sns.catplot(x="Sharing Firm Introduced", y="six_m_diff_shareprice",hue='Relatedness with Sharing Firm',col='Exchange', kind='box',data=sixmonthdata)
CATsvsSixM.set_ylabels('% Change in Share Price (t=6)')
#CATsvsSixM.fig.suptitle('Figure 5. six_m_diff_shareprice vs. Categorical Variables', weight='bold',y=1.05,x=.15)
CATsvsSixM.savefig("Regression Output:Visualizations/EDA Plots/BoxplotsofSharePricevsCats/sixmonthcats.png", bbox_inches='tight')

CATsvsOneY = sns.catplot(x="Sharing Firm Introduced", y="one_y_diff_shareprice",hue='Relatedness with Sharing Firm',col='Exchange', kind='box',data=oneyeardata)
CATsvsOneY.set_ylabels('% Change in Share Price (t=12)')
#CATsvsOneY.fig.suptitle('Figure 6. one_y_diff_shareprice vs. Categorical Variables', weight='bold',y=1.05,x=.15)
CATsvsOneY.savefig("Regression Output:Visualizations/EDA Plots/BoxplotsofSharePricevsCats/oneyearcats.png", bbox_inches='tight')

CATsvsTwoY = sns.catplot(x="Sharing Firm Introduced", y="two_y_diff_shareprice",hue='Relatedness with Sharing Firm',col='Exchange', kind='box',data=twoyeardata)
CATsvsTwoY.set_ylabels('% Change in Share Price (t=24)')
#CATsvsOneY.fig.suptitle('Figure 6. one_y_diff_shareprice vs. Categorical Variables', weight='bold',y=1.05,x=.15)
CATsvsTwoY.savefig("Regression Output:Visualizations/EDA Plots/BoxplotsofSharePricevsCats/twoyearcats.png", bbox_inches='tight')

CATsvsThreeY = sns.catplot(x="Sharing Firm Introduced", y="three_y_diff_shareprice",hue='Relatedness with Sharing Firm',col='Exchange', kind='box',data=threeyeardata)
CATsvsThreeY.set_ylabels('% Change in Share Price (t=36)')
#CATsvsOneY.fig.suptitle('Figure 6. one_y_diff_shareprice vs. Categorical Variables', weight='bold',y=1.05,x=.15)
CATsvsThreeY.savefig("Regression Output:Visualizations/EDA Plots/BoxplotsofSharePricevsCats/threeyearcats.png", bbox_inches='tight')

In [None]:
#Scatterplots of Share Price vs Volume
#Potentially plot change in share price overtime. Will need to extract dates of observation during data cleanup
VolumevsOneM = sns.scatterplot(x="one_m_diff_volume", y="one_m_diff_shareprice",data=onemonthdata, linewidth=0)#.set_title('Figure 7. one_m_diff_shareprice vs. one_m_diff_volume',loc='left',fontdict={'fontweight':'bold'})
VolumevsOneM.set_ylabel('% Change in Share Price (t=1)')
VolumevsOneM.set_xlabel('% Change in Volume (t=1)')
VolumevsOneM.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
VolumevsOneM.figure.savefig("Regression Output:Visualizations/EDA Plots/ScatterplotsofSharePricevsVolume/onemonthvolume.png", bbox_inches='tight')


VolumevsThreeM = sns.scatterplot(x="three_m_diff_volume", y="three_m_diff_shareprice",data=threemonthdata, linewidth=0)#.set_title('Figure 8. three_m_diff_shareprice vs. three_m_diff_volume',loc='left',fontdict={'fontweight':'bold'})
VolumevsThreeM.set_ylabel('% Change in Share Price (t=3)')
VolumevsThreeM.set_xlabel('% Change in Volume (t=3)')
VolumevsThreeM.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
VolumevsThreeM.figure.savefig("Regression Output:Visualizations/EDA Plots/ScatterplotsofSharePricevsVolume/threemonthvolume.png", bbox_inches='tight')


VolumevsSixM = sns.scatterplot(x="six_m_diff_volume", y="six_m_diff_shareprice",data=sixmonthdata, linewidth=0)#.set_title('Figure 9. six_m_diff_shareprice vs. six_m_diff_volume',loc='left',fontdict={'fontweight':'bold'})
VolumevsSixM.set_ylabel('% Change in Share Price (t=6)')
VolumevsSixM.set_xlabel('% Change in Volume (t=6)')
VolumevsSixM.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
VolumevsSixM.figure.savefig("Regression Output:Visualizations/EDA Plots/ScatterplotsofSharePricevsVolume/sixmonthvolume.png", bbox_inches='tight')
# plt.show()

VolumevsOneY = sns.scatterplot(x="one_y_diff_volume", y="one_y_diff_shareprice",data=oneyeardata, linewidth=0)#.set_title('Figure 10. one_y_diff_shareprice vs. one_y_diff_volume',loc='left',fontdict={'fontweight':'bold'})
VolumevsOneY.set_ylabel('% Change in Share Price (t=12)')
VolumevsOneY.set_xlabel('% Change in Volume (t=12)')
VolumevsOneY.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
VolumevsOneY.figure.savefig("Regression Output:Visualizations/EDA Plots/ScatterplotsofSharePricevsVolume/oneyearvolume.png", bbox_inches='tight')

VolumevsTwoY = sns.scatterplot(x="two_y_diff_volume", y="two_y_diff_shareprice",data=twoyeardata, linewidth=0)#.set_title('Figure 10. two_y_diff_shareprice vs. two_y_diff_volume',loc='left',fontdict={'fontweight':'bold'})
VolumevsTwoY.set_ylabel('% Change in Share Price (t=24)')
VolumevsTwoY.set_xlabel('% Change in Volume (t=24)')
VolumevsTwoY.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
VolumevsTwoY.figure.savefig("Regression Output:Visualizations/EDA Plots/ScatterplotsofSharePricevsVolume/twoyearvolume.png", bbox_inches='tight')

VolumevsThreeY = sns.scatterplot(x="three_y_diff_volume", y="three_y_diff_shareprice",data=threeyeardata,linewidth=0)#.set_title('Figure 10. three_y_diff_shareprice vs. three_y_diff_volume',loc='left',fontdict={'fontweight':'bold'})
VolumevsThreeY.set_ylabel('% Change in Share Price (t=36)')
VolumevsThreeY.set_xlabel('% Change in Volume (t=36)')
VolumevsThreeY.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
VolumevsThreeY.figure.savefig("Regression Output:Visualizations/EDA Plots/ScatterplotsofSharePricevsVolume/threeyearvolume.png", bbox_inches='tight')

In [None]:
#Scatterplots of Share Price vs S&P500
SP500vsOneM = sns.scatterplot(x="one_m_diff_sp500", y="one_m_diff_shareprice", data=onemonthdata, linewidth=0)#.set_title('Figure 7. one_m_diff_shareprice vs. one_m_diff_sp500',loc='left',fontdict={'fontweight':'bold'})
SP500vsOneM.set_ylabel('% Change in Share Price (t=1)')
SP500vsOneM.set_xlabel('% Change in S&P500 (t=1)')
SP500vsOneM.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
SP500vsOneM.figure.savefig("Regression Output:Visualizations/EDA Plots/ScatterplotsofSharePricevsS&P500/onemonthsp500.png")

SP500vsThreeM = sns.scatterplot(x="three_m_diff_sp500", y="three_m_diff_shareprice",data=threemonthdata, linewidth=0)#.set_title('Figure 8. three_m_diff_shareprice vs. three_m_diff_sp500',loc='left',fontdict={'fontweight':'bold'})
SP500vsThreeM.set_ylabel('% Change in Share Price (t=3)')
SP500vsThreeM.set_xlabel('% Change in S&P500 (t=3)')
SP500vsThreeM.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
SP500vsThreeM.figure.savefig("Regression Output:Visualizations/EDA Plots/ScatterplotsofSharePricevsS&P500/threemonthsp500.png")

SP500vsSixM = sns.scatterplot(x="six_m_diff_sp500", y="six_m_diff_shareprice",data=sixmonthdata, linewidth=0)#.set_title('Figure 9. six_m_diff_shareprice vs. six_m_diff_sp500',loc='left',fontdict={'fontweight':'bold'})
SP500vsSixM.set_ylabel('% Change in Share Price (t=6)')
SP500vsSixM.set_xlabel('% Change in S&P500 (t=6)')
SP500vsSixM.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
SP500vsSixM.figure.savefig("Regression Output:Visualizations/EDA Plots/ScatterplotsofSharePricevsS&P500/sixmonthsp500.png")

SP500vsOneY = sns.scatterplot(x="one_y_diff_sp500", y="one_y_diff_shareprice",data=oneyeardata, linewidth=0)#.set_title('Figure 10. one_y_diff_shareprice vs. one_y_diff_sp500',loc='left',fontdict={'fontweight':'bold'})
SP500vsOneY.set_ylabel('% Change in Share Price (t=12)')
SP500vsOneY.set_xlabel('% Change in S&P500 (t=12)')
SP500vsOneY.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
SP500vsOneY.figure.savefig("Regression Output:Visualizations/EDA Plots/ScatterplotsofSharePricevsS&P500/oneyearsp500.png")

SP500vsTwoY = sns.scatterplot(x="two_y_diff_sp500", y="two_y_diff_shareprice",data=twoyeardata, linewidth=0)#.set_title('Figure 10. two_y_diff_shareprice vs. two_y_diff_sp500',loc='left',fontdict={'fontweight':'bold'})
SP500vsTwoY.set_ylabel('% Change in Share Price (t=24)')
SP500vsTwoY.set_xlabel('% Change in S&P500 (t=24)')
SP500vsTwoY.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
SP500vsTwoY.figure.savefig("Regression Output:Visualizations/EDA Plots/ScatterplotsofSharePricevsS&P500/twoyearsp500.png")

SP500vsThreeY = sns.scatterplot(x="three_y_diff_sp500", y="three_y_diff_shareprice",data=threeyeardata, linewidth=0)#.set_title('Figure 10. three_y_diff_shareprice vs. three_y_diff_sp500',loc='left',fontdict={'fontweight':'bold'})
SP500vsThreeY.set_ylabel('% Change in Share Price (t=36)')
SP500vsThreeY.set_xlabel('% Change in S&P500 (t=36)')
SP500vsThreeY.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
SP500vsThreeY.figure.savefig("Regression Output:Visualizations/EDA Plots/ScatterplotsofSharePricevsS&P500/threeyearsp500.png")

In [None]:
#Heatmap of Data for t=(6,12,24,36)
long_term_data = data[['CI', 'ISF', 'NASDAQ', 'NYSE', 'Other' ,'six_m_diff_volume', 'one_y_diff_volume',
       'two_y_diff_volume', 'three_y_diff_volume', 'six_m_diff_shareprice',
       'one_y_diff_shareprice', 'two_y_diff_shareprice',
       'three_y_diff_shareprice', 'six_m_diff_sp500', 'one_y_diff_sp500', 'two_y_diff_sp500',
       'three_y_diff_sp500']]
HeatMap = sns.heatmap(long_term_data.corr(),annot=False,cmap='coolwarm')#.set_title("Figure 11. Correlation Matrix/Heat Map",loc='left',fontdict={'fontweight':'bold'})
HeatMap.set_xticks(range(0,17))
HeatMap.set_xticklabels(['CI', 'ISF', 'NASDAQ', 'NYSE', 'Other', '% Change in Volume (t=6)',
       '% Change in Volume (t=12)', '% Change in Volume (t=24)','% Change in Volume (t=36)', '% Change in Share Price (t=6)', '% Change in Share Price (t=12)', '% Change in Share Price (t=24)', '% Change in Share Price (t=36)', '% Change in S&P500 (t=6)', '% Change in S&P500 (t=12)', '% Change in S&P500 (t=24)', '% Change in S&P500 (t=36)'])
HeatMap.set_yticks(range(0,17))
HeatMap.set_yticklabels(['CI', 'ISF', 'NASDAQ', 'NYSE', 'Other', '% Change in Volume (t=6)',
       '% Change in Volume (t=12)', '% Change in Volume (t=24)','% Change in Volume (t=36)', '% Change in Share Price (t=6)', '% Change in Share Price (t=12)', '% Change in Share Price (t=24)', '% Change in Share Price (t=36)', '% Change in S&P500 (t=6)', '% Change in S&P500 (t=12)', '% Change in S&P500 (t=24)', '% Change in S&P500 (t=36)'])
HeatMap.figure.savefig("Regression Output:Visualizations/EDA Plots/Heatmap/heatmap.png",bbox_inches='tight')
plt.show()

In [None]:
#Heatmap of All the Data
HeatMap = sns.heatmap(data.loc[:,'CI':'three_y_diff_sp500'].corr(),annot=False,cmap='coolwarm')#.set_title("Figure 11. Correlation Matrix/Heat Map",loc='left',fontdict={'fontweight':'bold'})
HeatMap.set_xticks(range(0,23))
HeatMap.set_xticklabels(['CI', 'ISF', 'NASDAQ', 'NYSE', 'Other',
       '% Change in Volume (t=1)', '% Change in Volume (t=3)', '% Change in Volume (t=6)',
       '% Change in Volume (t=12)', '% Change in Volume (t=24)','% Change in Volume (t=36)', '% Change in Share Price (t=1)',
       '% Change in Share Price (t=3)', '% Change in Share Price (t=6)', '% Change in Share Price (t=12)', '% Change in Share Price (t=24)',         '% Change in Share Price (t=36)', '% Change in S&P500 (t=1)', '% Change in S&P500 (t=3)', '% Change in S&P500 (t=6)', '% Change in S&P500 (t=12)', '% Change in S&P500 (t=24)', '% Change in S&P500 (t=36)'])
HeatMap.set_yticks(range(0,23))
HeatMap.set_yticklabels(['CI', 'ISF', 'NASDAQ', 'NYSE', 'Other',
       '% Change in Volume (t=1)', '% Change in Volume (t=3)', '% Change in Volume (t=6)',
       '% Change in Volume (t=12)', '% Change in Volume (t=24)','% Change in Volume (t=36)', '% Change in Share Price (t=1)',
       '% Change in Share Price (t=3)', '% Change in Share Price (t=6)', '% Change in Share Price (t=12)', '% Change in Share Price (t=24)', '% Change in Share Price (t=36)', '% Change in S&P500 (t=1)', '% Change in S&P500 (t=3)', '% Change in S&P500 (t=6)', '% Change in S&P500 (t=12)', '% Change in S&P500 (t=24)', '% Change in S&P500 (t=36)'])
HeatMap.figure.savefig("Regression Output:Visualizations/EDA Plots/Heatmap/fullheatmap.png",bbox_inches='tight')
plt.show()

In [None]:
#Distributions of % Change in Share Price
OneMDistofSharePriceDiff = sns.distplot(onemonthdata['one_m_diff_shareprice'])#.set_title("Figure 12. KDE of one_m_diff_shareprice",loc='left',fontdict={'fontweight':'bold'})
OneMDistofSharePriceDiff.set_xlabel('% Change in Share Price (t=1)')
OneMDistofSharePriceDiff.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
OneMDistofSharePriceDiff.figure.savefig("Regression Output:Visualizations/EDA Plots/SharePriceKDEs/onemonthkde.png")

ThreeMDistofSharePriceDiff = sns.distplot(threemonthdata['three_m_diff_shareprice'])#.set_title("Figure 13. KDE of three_m_diff_shareprice",loc='left',fontdict={'fontweight':'bold'})
ThreeMDistofSharePriceDiff.set_xlabel('% Change in Share Price (t=3)')
ThreeMDistofSharePriceDiff.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
ThreeMDistofSharePriceDiff.figure.savefig("Regression Output:Visualizations/EDA Plots/SharePriceKDEs/threemonthkde.png")

SixMDistofSharePriceDiff = sns.distplot(sixmonthdata['six_m_diff_shareprice'])#.set_title("Figure 14. KDE of six_m_diff_shareprice",loc='left',fontdict={'fontweight':'bold'})
SixMDistofSharePriceDiff.set_xlabel('% Change in Share Price (t=6)')
SixMDistofSharePriceDiff.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
SixMDistofSharePriceDiff.figure.savefig("Regression Output:Visualizations/EDA Plots/SharePriceKDEs/sixmonthkde.png")

OneYDistofSharePriceDiff = sns.distplot(oneyeardata['one_y_diff_shareprice'])#.set_title("Figure 15. KDE of one_y_diff_shareprice",loc='left',fontdict={'fontweight':'bold'})
OneYDistofSharePriceDiff.set_xlabel('% Change in Share Price (t=12)')
OneYDistofSharePriceDiff.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
OneYDistofSharePriceDiff.figure.savefig("Regression Output:Visualizations/EDA Plots/SharePriceKDEs/oneyearkde.png")

TwoYDistofSharePriceDiff = sns.distplot(twoyeardata['two_y_diff_shareprice'])#.set_title("Figure 15. KDE of two_y_diff_shareprice",loc='left',fontdict={'fontweight':'bold'})
TwoYDistofSharePriceDiff.set_xlabel('% Change in Share Price (t=24)')
TwoYDistofSharePriceDiff.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
TwoYDistofSharePriceDiff.figure.savefig("Regression Output:Visualizations/EDA Plots/SharePriceKDEs/twoyearkde.png")

ThreeYDistofSharePriceDiff = sns.distplot(threeyeardata['three_y_diff_shareprice'])#.set_title("Figure 15. KDE of three_y_diff_shareprice",loc='left',fontdict={'fontweight':'bold'})
ThreeYDistofSharePriceDiff.set_xlabel('% Change in Share Price (t=36)')
ThreeYDistofSharePriceDiff.figure.set_size_inches(5.5, 4)
plt.tight_layout()
plt.show()
ThreeYDistofSharePriceDiff.figure.savefig("Regression Output:Visualizations/EDA Plots/SharePriceKDEs/threeyearkde.png")

## Regression Analysis for t = One Month
Note: The Residual and Q-Q Plots for all the regressions were done in STATA

In [None]:
onemresult1 = sm.ols(formula="one_m_diff_shareprice ~ ISF + CI", data=onemonthdata).fit(cov_type='HC1')
onemresult2 = sm.ols(formula="one_m_diff_shareprice ~ ISF + CI + NASDAQ + NYSE", data=onemonthdata).fit(cov_type='HC1')
onemresult3 = sm.ols(formula="one_m_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + one_m_diff_volume ", data=onemonthdata).fit(cov_type='HC1')
onemresult4 = sm.ols(formula="one_m_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + one_m_diff_volume + one_m_diff_sp500 ", data=onemonthdata).fit(cov_type='HC1')
onemresult5 = sm.ols(formula="one_m_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + one_m_diff_volume + one_m_diff_sp500 + ISF:CI + ISF:NASDAQ + ISF:NYSE + ISF:one_m_diff_volume + ISF:one_m_diff_sp500 ", data=onemonthdata).fit(cov_type='HC1')

onemonthsummary = Stargazer([onemresult1, onemresult2, onemresult3, onemresult4, onemresult5])
onemonthsummary.covariate_order(['Intercept', 'ISF', 'CI','NASDAQ','NYSE', 'one_m_diff_volume', 'one_m_diff_sp500','ISF:CI','ISF:NASDAQ','ISF:NYSE','ISF:one_m_diff_volume','ISF:one_m_diff_sp500'])
onemonthsummary.add_custom_notes(['[1] Standard Errors are heteroscedasticity robust (HC1)'])
onemonthsummary.title("Table 5. Regression Output for t = One Month")

file = open("Regression Output:Visualizations/Regression Output/Python/onemonthoutput.html","w")
file.write(onemonthsummary.render_html())
file.close()
onemonthsummary

## Regression Analysis for t = Three Months

In [None]:
threemresult1 = sm.ols(formula="three_m_diff_shareprice ~ ISF + CI", data=threemonthdata).fit(cov_type='HC1')
threemresult2 = sm.ols(formula="three_m_diff_shareprice ~ ISF + CI + NASDAQ + NYSE", data=threemonthdata).fit(cov_type='HC1')
threemresult3 = sm.ols(formula="three_m_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + three_m_diff_volume", data=threemonthdata).fit(cov_type='HC1')
threemresult4 = sm.ols(formula="three_m_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + three_m_diff_volume + three_m_diff_sp500 ", data=threemonthdata).fit(cov_type='HC1')
threemresult5 = sm.ols(formula="three_m_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + three_m_diff_volume + three_m_diff_sp500 + ISF:CI + ISF:NASDAQ + ISF:NYSE + ISF:three_m_diff_volume + ISF:three_m_diff_sp500 ", data=threemonthdata).fit(cov_type='HC1')

threemonthsummary = Stargazer([threemresult1, threemresult2, threemresult3, threemresult4, threemresult5])
threemonthsummary.covariate_order(['Intercept', 'ISF', 'CI','NASDAQ','NYSE', 'three_m_diff_volume', 'three_m_diff_sp500','ISF:CI','ISF:NASDAQ','ISF:NYSE','ISF:three_m_diff_volume','ISF:three_m_diff_sp500'])
threemonthsummary.add_custom_notes(['[1] Standard Errors are heteroscedasticity robust (HC1)'])
threemonthsummary.title("Table 6. Regression Output for t = Three Months")

file = open("Regression Output:Visualizations/Regression Output/Python/threemonthoutput.html","w")
file.write(threemonthsummary.render_html())
file.close()
threemonthsummary

## Regression Analysis for t = Six Months

In [None]:
sixmresult1 = sm.ols(formula="six_m_diff_shareprice ~ ISF + CI", data=sixmonthdata).fit(cov_type='HC1')
sixmresult2 = sm.ols(formula="six_m_diff_shareprice ~ ISF + CI + NASDAQ + NYSE", data=sixmonthdata).fit(cov_type='HC1')
sixmresult3 = sm.ols(formula="six_m_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + six_m_diff_volume", data=sixmonthdata).fit(cov_type='HC1')
sixmresult4 = sm.ols(formula="six_m_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + six_m_diff_volume + six_m_diff_sp500 ", data=sixmonthdata).fit(cov_type='HC1')
sixmresult5 = sm.ols(formula="six_m_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + six_m_diff_volume + six_m_diff_sp500 + ISF:CI + ISF:NASDAQ + ISF:NYSE + ISF:six_m_diff_volume + ISF:six_m_diff_sp500 ", data=sixmonthdata).fit(cov_type='HC1')

sixmonthsummary = Stargazer([sixmresult1, sixmresult2, sixmresult3, sixmresult4, sixmresult5])
sixmonthsummary.covariate_order(['Intercept', 'ISF', 'CI','NASDAQ','NYSE', 'six_m_diff_volume', 'six_m_diff_sp500','ISF:CI','ISF:NASDAQ','ISF:NYSE','ISF:six_m_diff_volume','ISF:six_m_diff_sp500'])
sixmonthsummary.add_custom_notes(['[1] Standard Errors are heteroscedasticity robust (HC1)'])
sixmonthsummary.title("Table 7. Regression Output for t = Six Months")

file = open("Regression Output:Visualizations/Regression Output/Python/sixmonthoutput.html","w")
file.write(sixmonthsummary.render_html())
file.close()
sixmonthsummary

## Regression Analysis for t = One Year

In [None]:
oneyresult1 = sm.ols(formula="one_y_diff_shareprice ~ ISF + CI", data=oneyeardata).fit(cov_type='HC1')
oneyresult2 = sm.ols(formula="one_y_diff_shareprice ~ ISF + CI + NASDAQ + NYSE", data=oneyeardata).fit(cov_type='HC1')
oneyresult3 = sm.ols(formula="one_y_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + one_y_diff_volume", data=oneyeardata).fit(cov_type='HC1')
oneyresult4 = sm.ols(formula="one_y_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + one_y_diff_volume + one_y_diff_sp500 ", data=oneyeardata).fit(cov_type='HC1')
oneyresult5 = sm.ols(formula="one_y_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + one_y_diff_volume + one_y_diff_sp500 + ISF:CI + ISF:NASDAQ + ISF:NYSE + ISF:one_y_diff_volume + ISF:one_y_diff_sp500 ", data=oneyeardata).fit(cov_type='HC1')

oneyearsummary = Stargazer([oneyresult1, oneyresult2, oneyresult3, oneyresult4, oneyresult5])
oneyearsummary.covariate_order(['Intercept', 'ISF', 'CI','NASDAQ','NYSE', 'one_y_diff_volume', 'one_y_diff_sp500','ISF:CI','ISF:NASDAQ','ISF:NYSE','ISF:one_y_diff_volume','ISF:one_y_diff_sp500'])
oneyearsummary.add_custom_notes(['[1] Standard Errors are heteroscedasticity robust (HC1)'])
oneyearsummary.title("Table 8. Regression Output for t = One Year")

file = open("Regression Output:Visualizations/Regression Output/Python/oneyearoutput.html","w")
file.write(oneyearsummary.render_html())
file.close()
oneyearsummary

## Regression Analysis for t = Two Years

In [None]:
twoyresult1 = sm.ols(formula="two_y_diff_shareprice ~ ISF + CI", data=twoyeardata).fit(cov_type='HC1')
twoyresult2 = sm.ols(formula="two_y_diff_shareprice ~ ISF + CI + NASDAQ + NYSE", data=twoyeardata).fit(cov_type='HC1')
twoyresult3 = sm.ols(formula="two_y_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + two_y_diff_volume", data=twoyeardata).fit(cov_type='HC1')
twoyresult4 = sm.ols(formula="two_y_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + two_y_diff_volume + two_y_diff_sp500 ", data=twoyeardata).fit(cov_type='HC1')
twoyresult5 = sm.ols(formula="two_y_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + two_y_diff_volume + two_y_diff_sp500 + ISF:CI + ISF:NASDAQ + ISF:NYSE + ISF:two_y_diff_volume + ISF:two_y_diff_sp500 ", data=twoyeardata).fit(cov_type='HC1')

twoyearsummary = Stargazer([twoyresult1, twoyresult2, twoyresult3, twoyresult4, twoyresult5])
twoyearsummary.covariate_order(['Intercept', 'ISF', 'CI','NASDAQ','NYSE', 'two_y_diff_volume', 'two_y_diff_sp500','ISF:CI','ISF:NASDAQ','ISF:NYSE','ISF:two_y_diff_volume','ISF:two_y_diff_sp500'])
twoyearsummary.add_custom_notes(['[1] Standard Errors are heteroscedasticity robust (HC1)'])
twoyearsummary.title("Table 8. Regression Output for t = Two Years")

file = open("Regression Output:Visualizations/Regression Output/Python/twoyearoutput.html","w")
file.write(twoyearsummary.render_html())
file.close()
twoyearsummary

## Regression Analysis for t = Three Years

In [None]:
threeyresult1 = sm.ols(formula="three_y_diff_shareprice ~ ISF + CI", data=threeyeardata).fit(cov_type='HC1')
threeyresult2 = sm.ols(formula="three_y_diff_shareprice ~ ISF + CI + NASDAQ + NYSE", data=threeyeardata).fit(cov_type='HC1')
threeyresult3 = sm.ols(formula="three_y_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + three_y_diff_volume", data=threeyeardata).fit(cov_type='HC1')
threeyresult4 = sm.ols(formula="three_y_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + three_y_diff_volume + three_y_diff_sp500 ", data=threeyeardata).fit(cov_type='HC1')
threeyresult5 = sm.ols(formula="three_y_diff_shareprice ~ ISF + CI + NASDAQ + NYSE + three_y_diff_volume + three_y_diff_sp500 + ISF:CI + ISF:NASDAQ + ISF:NYSE + ISF:three_y_diff_volume + ISF:three_y_diff_sp500 ", data=threeyeardata).fit(cov_type='HC1')

threeyearsummary = Stargazer([threeyresult1, threeyresult2, threeyresult3, threeyresult4, threeyresult5])
threeyearsummary.covariate_order(['Intercept', 'ISF', 'CI','NASDAQ','NYSE', 'three_y_diff_volume', 'three_y_diff_sp500','ISF:CI','ISF:NASDAQ','ISF:NYSE','ISF:three_y_diff_volume','ISF:three_y_diff_sp500'])
threeyearsummary.add_custom_notes(['[1] Standard Errors are heteroscedasticity robust (HC1)'])
threeyearsummary.title("Table 8. Regression Output for t = Three Years")

file = open("Regression Output:Visualizations/Regression Output/Python/threeyearoutput.html","w")
file.write(threeyearsummary.render_html())
file.close()
threeyearsummary