In [None]:
#installing the text extraction module
!pip install pdfminer.six

Collecting pdfminer.six
  Downloading pdfminer.six-20201018-py3-none-any.whl (5.6 MB)
[K     |████████████████████████████████| 5.6 MB 3.9 MB/s 
Collecting cryptography
  Downloading cryptography-3.4.7-cp36-abi3-manylinux2014_x86_64.whl (3.2 MB)
[K     |████████████████████████████████| 3.2 MB 36.2 MB/s 
Installing collected packages: cryptography, pdfminer.six
Successfully installed cryptography-3.4.7 pdfminer.six-20201018


In [None]:

#loading libraries
import numpy as np
import os
import re
import pandas as pd
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from nltk.stem import PorterStemmer 
from pdfminer.high_level import extract_text
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
#downloading extra data for the deployed NLP library (nltk)
nltk.download('stopwords')
nltk.download('punkt')
stopword = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
ps = PorterStemmer()  #for stemming

In [None]:
from google.colab import drive
drive.mount('/content/drive/')


Mounted at /content/drive/


In [None]:
#function for extracting mda text files
def mda_text_extractor(file_name,start_page, end_page):
  text = extract_text(file_name,page_numbers=list(np.arange(start_page,end_page+1)))
  return text

#function for writing mda text files into a text file
def write_to_text(textf, name):
  os.makedirs(os.path.dirname(f'MD&A_text_files/{name}'), exist_ok=True)
  with open(f'MD&A_text_files/{name}', 'w',encoding='utf-8') as ofile:
    ofile.write(textf)

#function for preprocessing text 
def prepare(name):  
  mylines = []                            
  with open (f'MD&A_text_files/{name}', 'rt',encoding='utf-8', errors='ignore') as myfile:
      for line in myfile:
          line1 = re.sub('[^a-zA-Z]',' ', line) #removing all except words
          line2 = line1.rstrip().lstrip().lower() #lower casing and removing extra spaces
          mylines.append(line2) 
  return mylines

#function for tokenizing text, removing stopword and stemming
def tokenizing(lines,stopword):
  corpus = " ".join(lines)
  corpus_tokenized = nltk.word_tokenize(corpus)
  corpus_tokenized = [word for word in corpus_tokenized if not word in stopword]   # removes stopwords
  corpus_tokenized = [ps.stem(word) for word in corpus_tokenized]  # keep the stem
  corpus_cleaned = " ".join(corpus_tokenized)
  corpus_cleaned =np.array(corpus_cleaned).reshape(-1)

  return corpus_cleaned

def count_length(file_name, start_page, end_page, out_name):
  text = mda_text_extractor(file_name,start_page, end_page)
  write_to_text(text,out_name)
  lines = prepare(out_name)
  corpus = " ".join(lines)
  corpus_tokenized = nltk.word_tokenize(corpus)
  n_words = len(corpus_tokenized)
  return n_words  
  


#function for applying tfidf vectorization
def tfidf_vsm(corpus_cleaned):
  vectorizer = TfidfVectorizer()
  x =  vectorizer.fit_transform(corpus_cleaned).toarray()
  x_df = pd.DataFrame(x)

  return x, x_df

#function for converting text into VSM model
def convert_to_vsm(file_name, start_page, end_page, out_name):
  text = mda_text_extractor(file_name,start_page, end_page)
  write_to_text(text,out_name)
  lines = prepare(out_name)
  corpus = " ".join(lines)
  corpus_tokenized = nltk.word_tokenize(corpus)
  n_words = len(corpus_tokenized)

  corpus_cleaned = tokenizing(lines,stopword)
  x , x_df = tfidf_vsm(corpus_cleaned)

  return x, x_df, n_words

#functions to calculate the final scores
def truncateData(data):
    truncLength = min([len(sym) for sym in data])
    return [x[len(x) - truncLength:] for x in data]

def calculate_score(x,y):
  lists = [x[0],y[0]]
  tr_list = truncateData(lists)
  score = sum([i*j for i,j in zip(x[0],y[0])])
  return score

def calculate_modification_score(doc1, start_page1, end_page1, out_name1,doc2, start_page2, end_page2, out_name2):
  x, x_df, n_words = convert_to_vsm(doc1, start_page1, end_page1, out_name1)
  y, y_df, n_words1 = convert_to_vsm(doc2, start_page2, end_page2, out_name2)

  ss = calculate_score(x,y)
  return  ss,n_words, n_words1


In [None]:

dir1 = "/content/MD&A_Directory/MD&A Directory/"
all_files = os.listdir(dir1)



In [None]:
len(all_files)

2

In [None]:
key_names0 = ["".join(i.split(",")[0].split("_")[:-1]) for i in all_files] 
tmp = [x for x in key_names0 if str(x) != 'nan']
print(len(tmp))

2


In [None]:
key_names1 = list(set(key_names0))

In [None]:
len(key_names1)

1

In [None]:
#putting the pdf files into seperate folders
for b in key_names1:
  try:
    pattern = b 
    matching_files = [f for f in all_files if pattern in "".join(f.split(",")[0].split("_")[:-1])]
    if len(matching_files) ==1:
      continue
    
    destinn = "/content/MD&A_Directory/MD&A Directory/"+ pattern + "/"
    os.makedirs(destinn, exist_ok= True)
    
    for fl in matching_files:
      os.rename("/content/MD&A_Directory/MD&A Directory/"+  fl , destinn + fl)
  except:
    print(b)
    
  

In [None]:
import math

In [None]:
import json

In [None]:
pagesn = json.load(open('/content/drive/MyDrive/page_numbers_1.json')) #loads in the file containing the page numbers that the texts needs to be extracted from

In [None]:
pagesn['Gran_Tierra_annual_report_Dec 31, 2015_Item 7_ONLY_p.58-82.pdf'] = [58,82]
pagesn['Gran_Tierra_annual_report_Dec 31, 2016_Item 7_ONLY_p.56-82.pdf'] = [56,82]
pagesn['Gran_Tierra_annual_report_Dec 31, 2017_Item 7_ONLY_p.34-57.pdf'] = [34,57]
pagesn['Power_Financial_Corporation_of_Canada_MD&A_Dec 31, 2015_p.4-44.pdf'] = [4,44]
pagesn['Power_Financial_Corporation_of_Canada_MD&A_Dec 31, 2016_p.4-47.pdf'] = [4,47]
pagesn['Power_Corporation_of_Canada_MD&A_Dec 31, 2015_p.4-50.pdf'] = [4,50]
pagesn['Power_Corporation_of_Canada_MD&A_Dec 31, 2016_p.4-52.pdf'] = [4,52]
pagesn['Power_Corporation_of_Canada_MD&A_Dec 31, 2017_p.4-62.pdf'] = [4,62]
pagesn['Klondex_Mines_Ltd_MD&A_Dec 31, 2016_Item_7_p.33-53.pdf'] = [33,53]
pagesn['Klondex_Mines_Ltd_MD&A_Dec 31, 2017_Item_7_p.34-58.pdf'] = [34,58]
pagesn['Cascade_MD&A_Dec 31, 2016_p.3-end.pdf'] = [3,1000]
pagesn['Cascade_MD&A_Dec 31, 2017_p.3-end.pdf'] = [3,1000]
pagesn['Primero_Mining_Dec 31, 2016_p20 -73 .pdf'] = [20,73]


In [None]:
scores = {}

In [None]:
#loop that iterates in the folders and calculate the score between two years
doc_length1 ={}
doc_length2 ={}
scores = {}
error = []

for fn in key_names1:
  try:
    
    dir_file ="/content/MD&A_Directory/MD&A Directory/" + fn + "/"     # Replace with path to your directory: absolute or relative
    pdf_names = os.listdir(dir_file)
    pdf_names = [g for g in pdf_names if g!='.ipynb_checkpoints']
    helper_dict = {}
    years = [2016,2017]
    for y, pds in enumerate(pdf_names):
      if y<2:
        name = os.path.splitext(pds)[0]
        ind = years[y]
        helper_dict[int(ind)] = pds
      else:
        continue
    for year in years[:-1]:
      try:
        ic = " ".join(name.split("_")[:3])
        id = "_".join([ic,str(year),str(year+1)])
        doc1 = dir_file +  helper_dict[year]
        try:
          start_page1 = pagesn[helper_dict[year]][0]
        except:
          start_page1 = 0
        try:
          end_page1 = pagesn[helper_dict[year]][1]
        except:
          end_page1 = 1000
        doc2 = dir_file + helper_dict[year+1]
        try:
          start_page2 = pagesn[helper_dict[year+1]][0]
        except:
          start_page2 = 0
        try:
          end_page2 = pagesn[helper_dict[year+1]][1]
        except:
          end_page2 =1000   
        out_name1 = helper_dict[year] + '.txt'
        out_name2 = helper_dict[year+1] + '.txt'
        id = "_".join([fn,str(year),str(year+1)])

        score,n1,n2=calculate_modification_score(doc1, start_page1, end_page1,out_name1,doc2, start_page2 , end_page2 , out_name2)
        print(id)
        print(1-score)
        scores[id]  = 1-score
        #doc_length1[id] =n1
        doc_length2[id] =n2
      except Exception as e:
        error.append(id)
        print(e)
        continue 
  except Exception as e:
    print(e)
    


PrimeroMiningMD&A_2016_2017
0.8372004960467611


'\nscores = {x:y for x,y in scores.items()}\nscores = {x:y for x,y in scores.items() if y!=0}\ndf = pd.DataFrame.from_dict(scores, orient=\'index\', columns = [\'Score\'])\n#df1 = pd.DataFrame.from_dict(doc_length1, orient=\'index\', columns = [\'n_words\'])\ndf2 = pd.DataFrame.from_dict(doc_length2, orient=\'index\', columns = [\'n_words\'])\ndf.rename_axis("Name", axis = \'columns\',inplace = True)\n\ndf.to_csv(\'/content/drive/MyDrive/MDA_SCORE_2018_2019_ext.csv\')\n#df1.to_csv(\'/content/drive/MyDrive/doc_length1.csv\')\ndf2.to_csv(\'/content/drive/MyDrive/doc_length_2018_2019_ext.csv\')\n'

In [None]:

df0 = pd.read_csv('/content/drive/MyDrive/MDA_SCORE_2015_2016_ext.csv')
df0 = df0.rename(columns={'Unnamed: 0':'Company_Names','Score':'Raw_Score'})
df0 = df0[['Company_Names','Raw_Score']].reset_index(drop=True)
df0l = pd.read_csv('/content/drive/MyDrive/doc_length_2015_2016_ext.csv')
df0l = df0l.rename(columns={'Unnamed: 0':'Company_Names'})
df0l = df0l[['Company_Names','n_words']].reset_index(drop=True)
df0 = df0.merge(df0l,on='Company_Names')

df1 = pd.read_csv('/content/drive/MyDrive/MDA_SCORE_2016_2017_ext.csv')
df1 = df1.rename(columns={'Unnamed: 0':'Company_Names','Score':'Raw_Score'})
df1 = df1[['Company_Names','Raw_Score']].reset_index(drop=True)
df1l = pd.read_csv('/content/drive/MyDrive/doc_length_2016_2017_ext.csv')
df1l = df1l.rename(columns={'Unnamed: 0':'Company_Names'})
df1l = df1l[['Company_Names','n_words']].reset_index(drop=True)
df1 = df1.merge(df1l,on='Company_Names')

df2 = pd.read_csv('/content/drive/MyDrive/MDA_SCORE_2017_2018_ext.csv')
df2 = df2.rename(columns={'Unnamed: 0':'Company_Names','Score':'Raw_Score'})
df2 = df2[['Company_Names','Raw_Score']].reset_index(drop=True)
df2l = pd.read_csv('/content/drive/MyDrive/doc_length_2017_2018_ext.csv')
df2l = df2l.rename(columns={'Unnamed: 0':'Company_Names'})
df2l = df2l[['Company_Names','n_words']].reset_index(drop=True)
df2 = df2.merge(df2l,on='Company_Names')

df3 = pd.read_csv('/content/drive/MyDrive/MDA_SCORE_2018_2019_ext.csv')
df3 = df3.rename(columns={'Unnamed: 0':'Company_Names','Score':'Raw_Score'})
df3 = df3[['Company_Names','Raw_Score']].reset_index(drop=True)
df3l = pd.read_csv('/content/drive/MyDrive/doc_length_2018_2019_ext.csv')
df3l = df3l.rename(columns={'Unnamed: 0':'Company_Names'})
df3l = df3l[['Company_Names','n_words']].reset_index(drop=True)
df3 = df3.merge(df3l,on='Company_Names')
df_final = pd.concat([df0,df1,df2,df3],axis=0).reset_index(drop=True)

In [None]:
df_final

Unnamed: 0,Company_Names,Raw_Score,n_words
0,AltagasMD&A_2015_2016,0.841735,33219
1,CrewEnergyMD&A_2016_2017,0.746125,10194
2,AltagasMD&A_2016_2017,0.760981,32144
3,CrewEnergyMD&A_2017_2018,0.717666,10194
4,AltagasMD&A_2017_2018,0.814733,32144
5,AltagasMD&A_2018_2019,0.834793,30326


In [None]:
df_final.to_csv('/content/drive/MyDrive/MDA_SCORE_Final_ext.csv')

In [None]:
df = pd.read_csv('/content/MDA_SCORE_FINAL2_UPDATED (CL).csv')

#reimporting the final file to calculate fitted score (the last update was dated on July 30, 2021). Second udpate Sept 4.

In [None]:
df1 = df[['Company_Names','Raw_Score','n_words']].reset_index(drop=True)

In [None]:
df.head()

Unnamed: 0,Match Key,index,Company_Names,Raw_Score,n_words,fitted_score,MD&A_SCORE,Unnamed: 7,Unnamed: 8
0,ADVANTAGE OIL & GAS LTD2016,259.0,AdvantageOilMD&A_2015_2016,0.782025,7595,0.78,0.002025,,
1,ADVANTAGE OIL & GAS LTD2017,537.0,AdvantageOilMD&A_2016_2017,0.740817,9122,0.78,-0.039183,Pass,
2,ADVANTAGE OIL & GAS LTD2018,813.0,AdvantageOilMD&A_2017_2018,0.791845,9122,0.78,0.011845,Pass,
3,ADVANTAGE OIL & GAS LTD2019,1093.0,AdvantageOilMD&A_2018_2019,0.78274,9846,0.78,0.00274,Pass,
4,ADVANZ PHARMA CORP LTD2016,36.0,AdvanzPharmaMD&A_2015_2016,0.775292,19291,0.81,-0.034708,Pass,


In [None]:
from scipy.interpolate import approximate_taylor_polynomial
import numpy as np
from sklearn.linear_model import LinearRegression # sklearn linear model
from sklearn.metrics import mean_squared_error
#df = pd.read_csv('score_data.csv')
Y = df1['Raw_Score'].values
X = df1['n_words'].values
X_test = X
y = Y

In [None]:

temp = X.copy()
unit = X_test.copy()
a = temp
b = unit

             # Iteration starts from the power of 2
for j in range(2, 6):

                 # Calculate the new column
      new_a = a ** j
      new_b = b ** j

                 # Stack new column
      temp = pd.concat((pd.DataFrame(temp), pd.DataFrame(new_a)), axis=1)
      unit = pd.concat((pd.DataFrame(unit), pd.DataFrame(new_b)), axis=1)

      temp = np.array(temp)
      unit = np.array(unit)

     # Import linear model
lr = LinearRegression()
lr.fit(temp, y)

     # Return the predicted value of the test set
fitted_score=lr.predict(unit)
fitted_score = fitted_score.round(2)
mda_score = Y - fitted_score

df1['fitted_score'] = fitted_score
df1['MD&A_SCORE'] = mda_score
#df.to_csv('/final_score_data.csv')

In [None]:
df1.to_csv('/content/drive/MyDrive/MDA_SCORE_Final2.csv')

In [None]:
# The coefficients
lr.coef_

array([ 2.53562491e-06, -7.93089448e-12,  6.56092870e-18,  2.12717088e-22,
        2.28629813e-23])

In [None]:
lr.intercept_

0.761140207629408

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
pip install linearmodels

Collecting linearmodels
  Downloading linearmodels-4.24-cp37-cp37m-manylinux1_x86_64.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 3.8 MB/s 
[?25hCollecting property-cached>=1.6.3
  Downloading property_cached-1.6.4-py2.py3-none-any.whl (7.8 kB)
Collecting mypy-extensions>=0.4
  Downloading mypy_extensions-0.4.3-py2.py3-none-any.whl (4.5 kB)
Collecting statsmodels>=0.11
  Downloading statsmodels-0.12.2-cp37-cp37m-manylinux1_x86_64.whl (9.5 MB)
[K     |████████████████████████████████| 9.5 MB 38.9 MB/s 
Collecting pyhdfe>=0.1
  Downloading pyhdfe-0.1.0-py3-none-any.whl (18 kB)
Installing collected packages: statsmodels, pyhdfe, property-cached, mypy-extensions, linearmodels
  Attempting uninstall: statsmodels
    Found existing installation: statsmodels 0.10.2
    Uninstalling statsmodels-0.10.2:
      Successfully uninstalled statsmodels-0.10.2
Successfully installed linearmodels-4.24 mypy-extensions-0.4.3 property-cached-1.6.4 pyhdfe-0.1.0 statsmodels-0.12.2


In [None]:
# -*- coding: utf-8 -*-
"""
Created on Sat Jul  3 20:26:19 2021

@author: Amin.Masoudi

Updated August 13, 2021
"""

import pandas as pd

import statsmodels.formula.api as sm
#import statsmodels.api as sm1

import numpy as np

df = pd.read_excel('/Condensed Master Data Aug 7, 2021 (For Amin).xlsx')



dfd = df[['Revision','cusip','fyear','Car0','Car1','Car2','Car3','Scar0','Scar1','Scar2','Scar3','Car_e0','Car_e1','Car_e2','Car_e3','Scar_e0', 'Scar_e1', 'Scar_e2','Scar_e3','Other information MD&A Recoded','Score','Raw Score','Size','Filelate','NewItems (excluding 0)','POST']]
#CL: I added all of the relevant data columns
dfd.rename(columns={'Other information MD&A Recoded':'Auditor_OI','NewItems (excluding 0)':'NewItems'},inplace=True)
dfd['year'] = pd.to_datetime(dfd['fyear'],format='%Y')


dfd['AuScore'] = dfd['Auditor_OI'] * dfd['Score']

#Investors:
#Without CARea for Car0
#Score
cross_section_ols = sm.ols(formula='Car0 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Car0 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())





A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



First model result

robust
                            OLS Regression Results                            
Dep. Variable:                   Car0   R-squared:                       0.097
Model:                            OLS   Adj. R-squared:                  0.089
Method:                 Least Squares   F-statistic:                     92.20
Date:                Tue, 17 Aug 2021   Prob (F-statistic):          6.80e-139
Time:                        18:40:01   Log-Likelihood:                 1515.1
No. Observations:                1141   AIC:                            -3010.
Df Residuals:                    1131   BIC:                            -2960.
Df Model:                           9                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Sat Jul  3 20:26:19 2021

@author: Amin.Masoudi

Updated August 13, 2021
"""

import pandas as pd

import statsmodels.formula.api as sm
#import statsmodels.api as sm1

import numpy as np

#df = pd.read_excel('/content/Condensed Master Data Aug 7, 2021 (For Amin).xlsx')
df = pd.read_excel('/Condensed Master Data Aug 7, 2021 (For Amin).xlsx')


dfd = df[['Revision','cusip','fyear','Car0','Car1','Car2','Car3','Scar0','Scar1','Scar2','Scar3','Car_e0','Car_e1','Car_e2','Car_e3','Scar_e0', 'Scar_e1', 'Scar_e2','Scar_e3','Other information MD&A Recoded','Score','Raw Score','Size','Filelate','NewItems (excluding 0)','POST']]
#CL: I added all of the relevant data columns
dfd.rename(columns={'Other information MD&A Recoded':'Auditor_OI','NewItems (excluding 0)':'NewItems'},inplace=True)
dfd['year'] = pd.to_datetime(dfd['fyear'],format='%Y')


dfd['AuScore'] = dfd['Auditor_OI'] * dfd['Score']

#Investors:
#Without CARea for Car0
#Score
cross_section_ols = sm.ols(formula='Car0 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Car0 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())





  import pandas.util.testing as tm
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



First model result

robust
                            OLS Regression Results                            
Dep. Variable:                   Car0   R-squared:                       0.097
Model:                            OLS   Adj. R-squared:                  0.089
Method:                 Least Squares   F-statistic:                     92.20
Date:                Tue, 17 Aug 2021   Prob (F-statistic):          6.80e-139
Time:                        18:26:20   Log-Likelihood:                 1515.1
No. Observations:                1141   AIC:                            -3010.
Df Residuals:                    1131   BIC:                            -2960.
Df Model:                           9                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------

In [None]:
#Investors:
#Without CARea for Car1
#Score

cross_section_ols = sm.ols(formula='Car1 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Car1 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',data=dfdd,groups=dfdd['cusip']).fit()
print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())




First model result

robust
                            OLS Regression Results                            
Dep. Variable:                   Car1   R-squared:                       0.128
Model:                            OLS   Adj. R-squared:                  0.121
Method:                 Least Squares   F-statistic:                     100.3
Date:                Fri, 13 Aug 2021   Prob (F-statistic):          2.60e-148
Time:                        20:43:34   Log-Likelihood:                 1591.7
No. Observations:                1141   AIC:                            -3163.
Df Residuals:                    1131   BIC:                            -3113.
Df Model:                           9                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------

In [None]:
#Investors:
#Without CARea for Car2
#Score

cross_section_ols = sm.ols(formula='Car2 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Car2 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',data=dfdd,groups=dfdd['cusip']).fit()
print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())




First model result

robust
                            OLS Regression Results                            
Dep. Variable:                   Car2   R-squared:                       0.096
Model:                            OLS   Adj. R-squared:                  0.089
Method:                 Least Squares   F-statistic:                     93.08
Date:                Fri, 13 Aug 2021   Prob (F-statistic):          6.15e-140
Time:                        20:44:18   Log-Likelihood:                 1514.8
No. Observations:                1141   AIC:                            -3010.
Df Residuals:                    1131   BIC:                            -2959.
Df Model:                           9                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------

In [None]:
#Investors:
#Without CARea for Car3
#Score
cross_section_ols = sm.ols(formula='Car3 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Car3 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',data=dfdd,groups=dfdd['cusip']).fit()
print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:                   Car3   R-squared:                       0.132
Model:                            OLS   Adj. R-squared:                  0.126
Method:                 Least Squares   F-statistic:                     101.1
Date:                Fri, 13 Aug 2021   Prob (F-statistic):          2.97e-149
Time:                        20:45:51   Log-Likelihood:                 1591.5
No. Observations:                1141   AIC:                            -3163.
Df Residuals:                    1131   BIC:                            -3113.
Df Model:                           9                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Investors:
#Without CARea for Scar0
#Score

cross_section_ols = sm.ols(formula='Scar0 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Scar0 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',data=dfdd,groups=dfdd['cusip']).fit()
print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:                  Scar0   R-squared:                       0.041
Model:                            OLS   Adj. R-squared:                  0.033
Method:                 Least Squares   F-statistic:                     113.1
Date:                Fri, 13 Aug 2021   Prob (F-statistic):          1.64e-162
Time:                        20:46:32   Log-Likelihood:                -2208.2
No. Observations:                1141   AIC:                             4436.
Df Residuals:                    1131   BIC:                             4487.
Df Model:                           9                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Investors:
#Without CARea for Scar1
#Score

cross_section_ols = sm.ols(formula='Scar1 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Scar1 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',data=dfdd,groups=dfdd['cusip']).fit()
print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:                  Scar1   R-squared:                       0.054
Model:                            OLS   Adj. R-squared:                  0.046
Method:                 Least Squares   F-statistic:                     118.4
Date:                Fri, 13 Aug 2021   Prob (F-statistic):          5.03e-168
Time:                        20:48:51   Log-Likelihood:                -2212.0
No. Observations:                1138   AIC:                             4444.
Df Residuals:                    1128   BIC:                             4494.
Df Model:                           9                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Investors:
#Without CARea for Scar2
#Score

cross_section_ols = sm.ols(formula='Scar2 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Scar2 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',data=dfdd,groups=dfdd['cusip']).fit()
print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:                  Scar2   R-squared:                       0.040
Model:                            OLS   Adj. R-squared:                  0.032
Method:                 Least Squares   F-statistic:                     112.4
Date:                Fri, 13 Aug 2021   Prob (F-statistic):          9.94e-162
Time:                        20:49:36   Log-Likelihood:                -2217.9
No. Observations:                1141   AIC:                             4456.
Df Residuals:                    1131   BIC:                             4506.
Df Model:                           9                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Investors:
#Without CARea for Scar3
#Score

cross_section_ols = sm.ols(formula='Scar3 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Scar3 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',data=dfdd,groups=dfdd['cusip']).fit()
print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:                  Scar3   R-squared:                       0.054
Model:                            OLS   Adj. R-squared:                  0.046
Method:                 Least Squares   F-statistic:                     117.9
Date:                Fri, 13 Aug 2021   Prob (F-statistic):          1.20e-167
Time:                        20:50:04   Log-Likelihood:                -2225.9
No. Observations:                1141   AIC:                             4472.
Df Residuals:                    1131   BIC:                             4522.
Df Model:                           9                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Investors:
#With CARea Car0 - Car_e0
#Score

cross_section_ols = sm.ols(formula='Car0 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Car_e0',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Car0 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore+Car_e0',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())





First model result

robust
                            OLS Regression Results                            
Dep. Variable:                   Car0   R-squared:                       0.834
Model:                            OLS   Adj. R-squared:                  0.833
Method:                 Least Squares   F-statistic:                     825.1
Date:                Fri, 13 Aug 2021   Prob (F-statistic):               0.00
Time:                        20:53:42   Log-Likelihood:                 2482.6
No. Observations:                1141   AIC:                            -4943.
Df Residuals:                    1130   BIC:                            -4888.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------

In [None]:
#Investors:
#With CARea Car1 - Car_e1
#Score

cross_section_ols = sm.ols(formula='Car1 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Car_e1',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Car1 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore+Car_e1',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())




First model result

robust
                            OLS Regression Results                            
Dep. Variable:                   Car1   R-squared:                       0.822
Model:                            OLS   Adj. R-squared:                  0.820
Method:                 Least Squares   F-statistic:                     813.4
Date:                Fri, 13 Aug 2021   Prob (F-statistic):               0.00
Time:                        20:55:11   Log-Likelihood:                 2497.1
No. Observations:                1141   AIC:                            -4972.
Df Residuals:                    1130   BIC:                            -4917.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------

In [None]:
#Investors:
#With CARea Car2 - Car_e2
#Score

cross_section_ols = sm.ols(formula='Car2 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Car_e2',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Car2 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore+Car_e2',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())




First model result

robust
                            OLS Regression Results                            
Dep. Variable:                   Car2   R-squared:                       0.834
Model:                            OLS   Adj. R-squared:                  0.832
Method:                 Least Squares   F-statistic:                     839.2
Date:                Fri, 13 Aug 2021   Prob (F-statistic):               0.00
Time:                        20:55:48   Log-Likelihood:                 2480.5
No. Observations:                1141   AIC:                            -4939.
Df Residuals:                    1130   BIC:                            -4884.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------

In [None]:
#Investors:
#With CARea Car3 - Car_e3
#Score

cross_section_ols = sm.ols(formula='Car3 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Car_e3',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Car3 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore+Car_e3',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())




First model result

robust
                            OLS Regression Results                            
Dep. Variable:                   Car3   R-squared:                       0.818
Model:                            OLS   Adj. R-squared:                  0.816
Method:                 Least Squares   F-statistic:                     774.4
Date:                Fri, 13 Aug 2021   Prob (F-statistic):               0.00
Time:                        20:57:01   Log-Likelihood:                 2481.7
No. Observations:                1141   AIC:                            -4941.
Df Residuals:                    1130   BIC:                            -4886.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------

In [None]:
#Investors:
#With CARea Scar0 - Scar_e0
#Score

cross_section_ols = sm.ols(formula='Scar0 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Scar_e0',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Scar0 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore+Scar_e0',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:                  Scar0   R-squared:                       0.807
Model:                            OLS   Adj. R-squared:                  0.805
Method:                 Least Squares   F-statistic:                     998.1
Date:                Fri, 13 Aug 2021   Prob (F-statistic):               0.00
Time:                        20:59:03   Log-Likelihood:                -1294.5
No. Observations:                1141   AIC:                             2611.
Df Residuals:                    1130   BIC:                             2666.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Investors:
#With CARea Scar1 - Scar_e1
#Score

cross_section_ols = sm.ols(formula='Scar1 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Scar_e1',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Scar1 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore+Scar_e1',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:                  Scar1   R-squared:                       0.778
Model:                            OLS   Adj. R-squared:                  0.776
Method:                 Least Squares   F-statistic:                     743.3
Date:                Fri, 13 Aug 2021   Prob (F-statistic):               0.00
Time:                        21:00:08   Log-Likelihood:                -1387.2
No. Observations:                1138   AIC:                             2796.
Df Residuals:                    1127   BIC:                             2852.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Investors:
#With CARea Scar2 - Scar_e2
#Score

cross_section_ols = sm.ols(formula='Scar2 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Scar_e2',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Scar2 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore+Scar_e2',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:                  Scar2   R-squared:                       0.805
Model:                            OLS   Adj. R-squared:                  0.803
Method:                 Least Squares   F-statistic:                     965.8
Date:                Fri, 13 Aug 2021   Prob (F-statistic):               0.00
Time:                        21:01:23   Log-Likelihood:                -1308.6
No. Observations:                1141   AIC:                             2639.
Df Residuals:                    1130   BIC:                             2695.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Investors:
#With CARea Scar3 - Scar_e3
#Score

cross_section_ols = sm.ols(formula='Scar3 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Scar_e3',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Scar3 ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore+Scar_e3',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:                  Scar3   R-squared:                       0.773
Model:                            OLS   Adj. R-squared:                  0.771
Method:                 Least Squares   F-statistic:                     716.2
Date:                Fri, 13 Aug 2021   Prob (F-statistic):               0.00
Time:                        21:02:03   Log-Likelihood:                -1410.6
No. Observations:                1141   AIC:                             2843.
Df Residuals:                    1130   BIC:                             2899.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Analysts:

#Without CARea
#Score

cross_section_ols = sm.ols(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.023
Model:                            OLS   Adj. R-squared:                  0.012
Method:                 Least Squares   F-statistic:                     13.95
Date:                Fri, 13 Aug 2021   Prob (F-statistic):           5.89e-23
Time:                        21:04:05   Log-Likelihood:                 696.63
No. Observations:                 834   AIC:                            -1373.
Df Residuals:                     824   BIC:                            -1326.
Df Model:                           9                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Analysts:

#With CARea - Car_e0
#Score

cross_section_ols = sm.ols(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Car_e0',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Car_e0',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())




First model result

robust
                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.037
Model:                            OLS   Adj. R-squared:                  0.025
Method:                 Least Squares   F-statistic:                     13.17
Date:                Fri, 13 Aug 2021   Prob (F-statistic):           2.46e-23
Time:                        21:06:25   Log-Likelihood:                 701.14
No. Observations:                 833   AIC:                            -1380.
Df Residuals:                     822   BIC:                            -1328.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------

In [None]:
#Analysts:

#With CARea - Car_e1
#Score

cross_section_ols = sm.ols(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Car_e1',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Car_e1',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.033
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     12.76
Date:                Fri, 13 Aug 2021   Prob (F-statistic):           1.55e-22
Time:                        21:06:57   Log-Likelihood:                 699.50
No. Observations:                 833   AIC:                            -1377.
Df Residuals:                     822   BIC:                            -1325.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Analysts:

#With CARea - Car_e2
#Score

cross_section_ols = sm.ols(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Car_e2',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Car_e2',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.037
Model:                            OLS   Adj. R-squared:                  0.025
Method:                 Least Squares   F-statistic:                     13.20
Date:                Fri, 13 Aug 2021   Prob (F-statistic):           2.24e-23
Time:                        21:07:22   Log-Likelihood:                 701.28
No. Observations:                 833   AIC:                            -1381.
Df Residuals:                     822   BIC:                            -1329.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Analysts:

#With CARea - Car_e3
#Score

cross_section_ols = sm.ols(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Car_e3',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Car_e3',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.033
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     12.75
Date:                Fri, 13 Aug 2021   Prob (F-statistic):           1.56e-22
Time:                        21:07:50   Log-Likelihood:                 699.50
No. Observations:                 833   AIC:                            -1377.
Df Residuals:                     822   BIC:                            -1325.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Analysts:

#With CARea - Scar_e0
#Score

cross_section_ols = sm.ols(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Scar_e0',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Scar_e0',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.023
Model:                            OLS   Adj. R-squared:                  0.012
Method:                 Least Squares   F-statistic:                     13.48
Date:                Fri, 13 Aug 2021   Prob (F-statistic):           6.32e-24
Time:                        21:08:35   Log-Likelihood:                 695.45
No. Observations:                 833   AIC:                            -1369.
Df Residuals:                     822   BIC:                            -1317.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Analysts:

#With CARea - Scar_e1
#Score

cross_section_ols = sm.ols(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Scar_e1',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Scar_e1',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.025
Model:                            OLS   Adj. R-squared:                  0.013
Method:                 Least Squares   F-statistic:                     14.08
Date:                Fri, 13 Aug 2021   Prob (F-statistic):           4.85e-25
Time:                        21:09:05   Log-Likelihood:                 692.04
No. Observations:                 830   AIC:                            -1362.
Df Residuals:                     819   BIC:                            -1310.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Analysts:

#With CARea - Scar_e2
#Score

cross_section_ols = sm.ols(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Scar_e2',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Scar_e2',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.023
Model:                            OLS   Adj. R-squared:                  0.012
Method:                 Least Squares   F-statistic:                     13.49
Date:                Fri, 13 Aug 2021   Prob (F-statistic):           6.24e-24
Time:                        21:09:39   Log-Likelihood:                 695.44
No. Observations:                 833   AIC:                            -1369.
Df Residuals:                     822   BIC:                            -1317.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
#Analysts:

#With CARea - Scar_e3
#Score

cross_section_ols = sm.ols(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Scar_e3',
                          data=dfd).fit()

dfdd = dfd.dropna()
 
cross_section_mixedlm = sm.mixedlm(formula='Revision ~ Score+ Size+ C(year)+ Filelate+ NewItems+ Auditor_OI+ AuScore + Scar_e3',data=dfdd,groups=dfdd['cusip']).fit()

print('\nFirst model result\n')   
print('robust')                      
print(cross_section_ols.get_robustcov_results(cov_type='HC1').summary())
print('\nnon-robust\n')
print(cross_section_ols.summary())
print('\nmixed LM\n')
print(cross_section_mixedlm.summary())


First model result

robust
                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.025
Model:                            OLS   Adj. R-squared:                  0.013
Method:                 Least Squares   F-statistic:                     14.22
Date:                Fri, 13 Aug 2021   Prob (F-statistic):           2.58e-25
Time:                        21:10:17   Log-Likelihood:                 695.93
No. Observations:                 833   AIC:                            -1370.
Df Residuals:                     822   BIC:                            -1318.
Df Model:                          10                                         
Covariance Type:                  HC1                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------



In [None]:
chek1 = dfd.isnull()
#chek1[chek1=='True']
chek1

Unnamed: 0,Revision,cusip,fyear,Car0,Car1,Car2,Car3,Scar0,Scar1,Scar2,Scar3,Car_e0,Car_e1,Car_e2,Car_e3,Scar_e0,Scar_e1,Scar_e2,Scar_e3,Auditor_OI,Score,Raw Score,Size,Filelate,NewItems,POST,year,AuScore
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,True,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,True,True,False,True,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1359,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1360,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1361,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1362,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [None]:
#Time-series fixed effects(second model)

import pandas as pd

import statsmodels.formula.api as sm
#import statsmodels.api as sm1

import numpy as np

df = pd.read_excel('/content/Treatment Group August 7, 2021.xlsx') 

dfd = df[['Revision','cusip','fyear','Car0','Car1','Car2','Car3','Scar0','Scar1','Scar2','Scar3','Car_e0','Car_e1','Car_e2','Car_e3','Scar_e0', 'Scar_e1', 'Scar_e2','Scar_e3','Other information MD&A Recoded','Score','Raw Score','Size','Filelate','NewItems (excluding 0)','POST']]
#CL: I added all of the relevant data columns
dfd.rename(columns={'Other information MD&A Recoded':'Auditor_OI','NewItems (excluding 0)':'NewItems'},inplace=True)
dfd['year'] = pd.to_datetime(dfd['fyear'],format='%Y')


dfd['POScore'] = dfd['POST'] * dfd['Score']

#Without CARea
#Score

for i in ['Car0','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car0 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/use


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car0   R-squared:                       0.346
Model:                            OLS   Adj. R-squared:                  0.093
Method:                 Least Squares   F-statistic:                     1.369
Date:                Sun, 15 Aug 2021   Prob (F-statistic):            0.00295
Time:                        01:36:30   Log-Likelihood:                 1203.6
No. Observations:                 725   AIC:                            -2001.
Df Residuals:                     522   BIC:                            -1070.
Df Model:                         202                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

In [None]:
#Investors
#Treatment Group
#Without CARea
#Score
#Car1

for i in ['Car1','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car1 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car1   R-squared:                       0.358
Model:                            OLS   Adj. R-squared:                  0.109
Method:                 Least Squares   F-statistic:                     1.438
Date:                Fri, 13 Aug 2021   Prob (F-statistic):           0.000698
Time:                        21:15:32   Log-Likelihood:                 1233.9
No. Observations:                 725   AIC:                            -2062.
Df Residuals:                     522   BIC:                            -1131.
Df Model:                         202                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [None]:
#Investors
#Treatment Group
#Without CARea
#Score
#Car2

for i in ['Car2','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car2 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())



Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car2   R-squared:                       0.345
Model:                            OLS   Adj. R-squared:                  0.092
Method:                 Least Squares   F-statistic:                     1.363
Date:                Sun, 15 Aug 2021   Prob (F-statistic):            0.00330
Time:                        01:40:24   Log-Likelihood:                 1202.3
No. Observations:                 725   AIC:                            -1999.
Df Residuals:                     522   BIC:                            -1068.
Df Model:                         202                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investors
#Treatment Group
#Without CARea
#Score
#Car3

for i in ['Car3','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car3 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())



Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car3   R-squared:                       0.359
Model:                            OLS   Adj. R-squared:                  0.111
Method:                 Least Squares   F-statistic:                     1.450
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           0.000549
Time:                        01:41:08   Log-Likelihood:                 1235.7
No. Observations:                 725   AIC:                            -2065.
Df Residuals:                     522   BIC:                            -1134.
Df Model:                         202                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investors
#Treatment Group
#Without CARea
#Score
#Scar0

for i in ['Scar0','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar0 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())



Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar0   R-squared:                       0.308
Model:                            OLS   Adj. R-squared:                  0.041
Method:                 Least Squares   F-statistic:                     1.151
Date:                Sun, 15 Aug 2021   Prob (F-statistic):              0.109
Time:                        01:44:06   Log-Likelihood:                -1282.1
No. Observations:                 725   AIC:                             2970.
Df Residuals:                     522   BIC:                             3901.
Df Model:                         202                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investors
#Treatment Group
#Without CARea
#Score
#Scar1

for i in ['Scar1','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar1 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar1   R-squared:                       0.309
Model:                            OLS   Adj. R-squared:                  0.041
Method:                 Least Squares   F-statistic:                     1.153
Date:                Sun, 15 Aug 2021   Prob (F-statistic):              0.106
Time:                        01:44:31   Log-Likelihood:                -1302.0
No. Observations:                 725   AIC:                             3010.
Df Residuals:                     522   BIC:                             3941.
Df Model:                         202                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investors
#Treatment Group
#Without CARea
#Score
#Scar2

for i in ['Scar2','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar2 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar2   R-squared:                       0.311
Model:                            OLS   Adj. R-squared:                  0.044
Method:                 Least Squares   F-statistic:                     1.166
Date:                Sun, 15 Aug 2021   Prob (F-statistic):             0.0899
Time:                        01:45:06   Log-Likelihood:                -1287.4
No. Observations:                 725   AIC:                             2981.
Df Residuals:                     522   BIC:                             3912.
Df Model:                         202                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investors
#Treatment Group
#Without CARea
#Score
#Scar3

for i in ['Scar3','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar3 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar3   R-squared:                       0.312
Model:                            OLS   Adj. R-squared:                  0.046
Method:                 Least Squares   F-statistic:                     1.173
Date:                Sun, 15 Aug 2021   Prob (F-statistic):             0.0820
Time:                        01:45:43   Log-Likelihood:                -1305.0
No. Observations:                 725   AIC:                             3016.
Df Residuals:                     522   BIC:                             3947.
Df Model:                         202                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investors
#Treatment Group
#With CARea
#Score
#Car0 + Car_e0

for i in ['Car0','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e0']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car0 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Car_e0',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car0   R-squared:                       0.926
Model:                            OLS   Adj. R-squared:                  0.896
Method:                 Least Squares   F-statistic:                     31.88
Date:                Sun, 15 Aug 2021   Prob (F-statistic):          1.66e-206
Time:                        01:46:57   Log-Likelihood:                 1990.9
No. Observations:                 725   AIC:                            -3574.
Df Residuals:                     521   BIC:                            -2638.
Df Model:                         203                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investors
#Treatment Group
#With CARea
#Score
#Car1 + Car_e1

for i in ['Car1','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e1']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car1 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Car_e1',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car1   R-squared:                       0.905
Model:                            OLS   Adj. R-squared:                  0.869
Method:                 Least Squares   F-statistic:                     24.57
Date:                Sun, 15 Aug 2021   Prob (F-statistic):          1.91e-180
Time:                        01:47:42   Log-Likelihood:                 1928.3
No. Observations:                 725   AIC:                            -3449.
Df Residuals:                     521   BIC:                            -2513.
Df Model:                         203                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investors
#Treatment Group
#With CARea
#Score
#Car2 + Car_e2

for i in ['Car2','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e2']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car2 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Car_e2',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car2   R-squared:                       0.926
Model:                            OLS   Adj. R-squared:                  0.897
Method:                 Least Squares   F-statistic:                     31.98
Date:                Sun, 15 Aug 2021   Prob (F-statistic):          8.01e-207
Time:                        01:48:18   Log-Likelihood:                 1991.2
No. Observations:                 725   AIC:                            -3574.
Df Residuals:                     521   BIC:                            -2639.
Df Model:                         203                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investors
#Treatment Group
#With CARea
#Score
#Car3 + Car_e3

for i in ['Car3','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e3']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car3 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Car_e3',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car3   R-squared:                       0.904
Model:                            OLS   Adj. R-squared:                  0.866
Method:                 Least Squares   F-statistic:                     24.07
Date:                Sun, 15 Aug 2021   Prob (F-statistic):          1.97e-178
Time:                        01:48:42   Log-Likelihood:                 1922.4
No. Observations:                 725   AIC:                            -3437.
Df Residuals:                     521   BIC:                            -2501.
Df Model:                         203                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investors
#Treatment Group
#With CARea
#Score
#Scar0 + Scar_e0

for i in ['Scar0','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e0']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar0 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Scar_e0',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar0   R-squared:                       0.925
Model:                            OLS   Adj. R-squared:                  0.895
Method:                 Least Squares   F-statistic:                     31.46
Date:                Sun, 15 Aug 2021   Prob (F-statistic):          3.74e-205
Time:                        01:49:22   Log-Likelihood:                -478.73
No. Observations:                 725   AIC:                             1365.
Df Residuals:                     521   BIC:                             2301.
Df Model:                         203                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investors
#Treatment Group
#With CARea
#Score
#Scar1 + Scar_e1

for i in ['Scar1','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e1']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar1 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Scar_e1',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar1   R-squared:                       0.905
Model:                            OLS   Adj. R-squared:                  0.867
Method:                 Least Squares   F-statistic:                     24.34
Date:                Sun, 15 Aug 2021   Prob (F-statistic):          1.50e-179
Time:                        01:51:57   Log-Likelihood:                -583.90
No. Observations:                 725   AIC:                             1576.
Df Residuals:                     521   BIC:                             2511.
Df Model:                         203                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investors
#Treatment Group
#With CARea
#Score
#Scar2 + Scar_e2

for i in ['Scar2','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e2']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar2 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Scar_e2',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar2   R-squared:                       0.925
Model:                            OLS   Adj. R-squared:                  0.896
Method:                 Least Squares   F-statistic:                     31.62
Date:                Sun, 15 Aug 2021   Prob (F-statistic):          1.19e-205
Time:                        01:52:18   Log-Likelihood:                -483.78
No. Observations:                 725   AIC:                             1376.
Df Residuals:                     521   BIC:                             2311.
Df Model:                         203                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investors
#Treatment Group
#With CARea
#Score
#Scar3 + Scar_e3

for i in ['Scar3','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e3']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar3 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Scar_e3',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar3   R-squared:                       0.903
Model:                            OLS   Adj. R-squared:                  0.866
Method:                 Least Squares   F-statistic:                     23.97
Date:                Sun, 15 Aug 2021   Prob (F-statistic):          5.05e-178
Time:                        01:53:07   Log-Likelihood:                -593.89
No. Observations:                 725   AIC:                             1596.
Df Residuals:                     521   BIC:                             2531.
Df Model:                         203                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Treatment Group
#Without CARea
#Score

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.409
Model:                            OLS   Adj. R-squared:                  0.117
Method:                 Least Squares   F-statistic:                     1.402
Date:                Sun, 15 Aug 2021   Prob (F-statistic):            0.00421
Time:                        01:53:54   Log-Likelihood:                 618.69
No. Observations:                 527   AIC:                            -887.4
Df Residuals:                     352   BIC:                            -140.6
Df Model:                         174                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [None]:
#Analyst
#Treatment Group
#With CARea
#Score
#Car_e0

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e0']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Car_e0',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.409
Model:                            OLS   Adj. R-squared:                  0.115
Method:                 Least Squares   F-statistic:                     1.390
Date:                Sun, 15 Aug 2021   Prob (F-statistic):            0.00508
Time:                        01:55:36   Log-Likelihood:                 618.69
No. Observations:                 527   AIC:                            -885.4
Df Residuals:                     351   BIC:                            -134.4
Df Model:                         175                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Treatment Group
#With CARea
#Score
#Car_e1

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e1']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Car_e1',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.409
Model:                            OLS   Adj. R-squared:                  0.115
Method:                 Least Squares   F-statistic:                     1.391
Date:                Sun, 15 Aug 2021   Prob (F-statistic):            0.00502
Time:                        01:56:11   Log-Likelihood:                 618.75
No. Observations:                 527   AIC:                            -885.5
Df Residuals:                     351   BIC:                            -134.5
Df Model:                         175                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Treatment Group
#With CARea
#Score
#Car_e2

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e2']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Car_e2',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.409
Model:                            OLS   Adj. R-squared:                  0.115
Method:                 Least Squares   F-statistic:                     1.390
Date:                Sun, 15 Aug 2021   Prob (F-statistic):            0.00508
Time:                        01:56:47   Log-Likelihood:                 618.69
No. Observations:                 527   AIC:                            -885.4
Df Residuals:                     351   BIC:                            -134.4
Df Model:                         175                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Treatment Group
#With CARea
#Score
#Car_e3

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e3']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Car_e3',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':



Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.409
Model:                            OLS   Adj. R-squared:                  0.115
Method:                 Least Squares   F-statistic:                     1.391
Date:                Sun, 15 Aug 2021   Prob (F-statistic):            0.00503
Time:                        01:57:19   Log-Likelihood:                 618.74
No. Observations:                 527   AIC:                            -885.5
Df Residuals:                     351   BIC:                            -134.4
Df Model:                         175                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

In [None]:
#Analyst
#Treatment Group
#With CARea
#Score
#Scar_e0

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e0']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Scar_e0',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.409
Model:                            OLS   Adj. R-squared:                  0.115
Method:                 Least Squares   F-statistic:                     1.391
Date:                Sun, 15 Aug 2021   Prob (F-statistic):            0.00503
Time:                        02:00:32   Log-Likelihood:                 618.73
No. Observations:                 527   AIC:                            -885.5
Df Residuals:                     351   BIC:                            -134.4
Df Model:                         175                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Treatment Group
#With CARea
#Score
#Scar_e1

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e1']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Scar_e1',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.409
Model:                            OLS   Adj. R-squared:                  0.115
Method:                 Least Squares   F-statistic:                     1.390
Date:                Sun, 15 Aug 2021   Prob (F-statistic):            0.00507
Time:                        02:01:48   Log-Likelihood:                 618.70
No. Observations:                 527   AIC:                            -885.4
Df Residuals:                     351   BIC:                            -134.4
Df Model:                         175                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Treatment Group
#With CARea
#Score
#Scar_e2

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e2']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Scar_e2',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.409
Model:                            OLS   Adj. R-squared:                  0.115
Method:                 Least Squares   F-statistic:                     1.391
Date:                Sun, 15 Aug 2021   Prob (F-statistic):            0.00503
Time:                        02:02:20   Log-Likelihood:                 618.74
No. Observations:                 527   AIC:                            -885.5
Df Residuals:                     351   BIC:                            -134.4
Df Model:                         175                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Treatment Group
#With CARea
#Score
#Scar_e3

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e3']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ C(cusip)+ Filelate+ NewItems+ POST + POScore + Scar_e3',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.409
Model:                            OLS   Adj. R-squared:                  0.115
Method:                 Least Squares   F-statistic:                     1.390
Date:                Sun, 15 Aug 2021   Prob (F-statistic):            0.00506
Time:                        02:02:46   Log-Likelihood:                 618.71
No. Observations:                 527   AIC:                            -885.4
Df Residuals:                     351   BIC:                            -134.4
Df Model:                         175                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Time-series fixed effects(second model)
#Adoption Avoidance Group

import pandas as pd

import statsmodels.formula.api as sm
#import statsmodels.api as sm1

import numpy as np

df = pd.read_excel('/content/Adoption Avoidance Group August 7, 2021.xlsx') 

dfd = df[['Revision','cusip','fyear','Car0','Car1','Car2','Car3','Scar0','Scar1','Scar2','Scar3','Car_e0','Car_e1','Car_e2','Car_e3','Scar_e0', 'Scar_e1', 'Scar_e2','Scar_e3','Other information MD&A Recoded','Score','Raw Score','Size','Filelate','NewItems (excluding 0)','POST']]
#CL: I added all of the relevant data columns
dfd.rename(columns={'Other information MD&A Recoded':'Auditor_OI','NewItems (excluding 0)':'NewItems'},inplace=True)
dfd['year'] = pd.to_datetime(dfd['fyear'],format='%Y')


dfd['POScore'] = dfd['POST'] * dfd['Score']

#Investor
#Without CARea
#Score

for i in ['Car0','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car0 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())




Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car0   R-squared:                       0.476
Model:                            OLS   Adj. R-squared:                  0.273
Method:                 Least Squares   F-statistic:                     2.351
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.15e-07
Time:                        02:16:38   Log-Likelihood:                 457.26
No. Observations:                 328   AIC:                            -730.5
Df Residuals:                     236   BIC:                            -381.6
Df Model:                          91                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/index

In [None]:
#Investor
#Adoption Avoidance Group
#Without CARea
#Score
#Car1

for i in ['Car1','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car1 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car1   R-squared:                       0.509
Model:                            OLS   Adj. R-squared:                  0.320
Method:                 Least Squares   F-statistic:                     2.694
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           8.49e-10
Time:                        02:18:12   Log-Likelihood:                 488.53
No. Observations:                 328   AIC:                            -793.1
Df Residuals:                     236   BIC:                            -444.1
Df Model:                          91                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Avoidance Group
#Without CARea
#Score
#Car2

for i in ['Car2','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car2 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car2   R-squared:                       0.472
Model:                            OLS   Adj. R-squared:                  0.268
Method:                 Least Squares   F-statistic:                     2.316
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.89e-07
Time:                        02:18:26   Log-Likelihood:                 456.21
No. Observations:                 328   AIC:                            -728.4
Df Residuals:                     236   BIC:                            -379.5
Df Model:                          91                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Avoidance Group
#Without CARea
#Score
#Car3

for i in ['Car3','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car3 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car3   R-squared:                       0.507
Model:                            OLS   Adj. R-squared:                  0.317
Method:                 Least Squares   F-statistic:                     2.670
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.19e-09
Time:                        02:18:50   Log-Likelihood:                 487.68
No. Observations:                 328   AIC:                            -791.4
Df Residuals:                     236   BIC:                            -442.4
Df Model:                          91                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Avoidance Group
#Without CARea
#Score
#Scar0

for i in ['Scar0','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar0 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar0   R-squared:                       0.346
Model:                            OLS   Adj. R-squared:                  0.094
Method:                 Least Squares   F-statistic:                     1.374
Date:                Sun, 15 Aug 2021   Prob (F-statistic):             0.0297
Time:                        03:22:29   Log-Likelihood:                -558.90
No. Observations:                 328   AIC:                             1302.
Df Residuals:                     236   BIC:                             1651.
Df Model:                          91                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Avoidance Group
#Without CARea
#Score
#Scar1

for i in ['Scar1','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar1 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar1   R-squared:                       0.354
Model:                            OLS   Adj. R-squared:                  0.102
Method:                 Least Squares   F-statistic:                     1.403
Date:                Sun, 15 Aug 2021   Prob (F-statistic):             0.0226
Time:                        03:22:33   Log-Likelihood:                -543.55
No. Observations:                 325   AIC:                             1271.
Df Residuals:                     233   BIC:                             1619.
Df Model:                          91                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Avoidance Group
#Without CARea
#Score
#Scar2

for i in ['Scar2','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar2 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar2   R-squared:                       0.347
Model:                            OLS   Adj. R-squared:                  0.095
Method:                 Least Squares   F-statistic:                     1.376
Date:                Sun, 15 Aug 2021   Prob (F-statistic):             0.0293
Time:                        03:22:39   Log-Likelihood:                -561.82
No. Observations:                 328   AIC:                             1308.
Df Residuals:                     236   BIC:                             1657.
Df Model:                          91                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Avoidance Group
#Without CARea
#Score
#Scar3

for i in ['Scar3','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar3 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar3   R-squared:                       0.353
Model:                            OLS   Adj. R-squared:                  0.103
Method:                 Least Squares   F-statistic:                     1.414
Date:                Sun, 15 Aug 2021   Prob (F-statistic):             0.0199
Time:                        03:22:43   Log-Likelihood:                -551.84
No. Observations:                 328   AIC:                             1288.
Df Residuals:                     236   BIC:                             1637.
Df Model:                          91                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Avoidance Group
#With CARea
#Score
#Car0 + Car_e0

for i in ['Car0','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e0']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car0 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e0',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car0   R-squared:                       0.883
Model:                            OLS   Adj. R-squared:                  0.837
Method:                 Least Squares   F-statistic:                     19.20
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           5.86e-72
Time:                        03:22:47   Log-Likelihood:                 702.68
No. Observations:                 328   AIC:                            -1219.
Df Residuals:                     235   BIC:                            -866.6
Df Model:                          92                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Avoidance Group
#With CARea
#Score
#Car1 + Car_e1

for i in ['Car1','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e1']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car1 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e1',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car1   R-squared:                       0.868
Model:                            OLS   Adj. R-squared:                  0.816
Method:                 Least Squares   F-statistic:                     16.74
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           3.46e-66
Time:                        03:22:51   Log-Likelihood:                 703.36
No. Observations:                 328   AIC:                            -1221.
Df Residuals:                     235   BIC:                            -868.0
Df Model:                          92                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Avoidance Group
#With CARea
#Score
#Car2 + Car_e2

for i in ['Car2','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e2']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car2 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e2',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car2   R-squared:                       0.883
Model:                            OLS   Adj. R-squared:                  0.837
Method:                 Least Squares   F-statistic:                     19.32
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           3.18e-72
Time:                        03:22:53   Log-Likelihood:                 703.70
No. Observations:                 328   AIC:                            -1221.
Df Residuals:                     235   BIC:                            -868.6
Df Model:                          92                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Avoidance Group
#With CARea
#Score
#Car3 + Car_e3

for i in ['Car3','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e3']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car3 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e3',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car3   R-squared:                       0.862
Model:                            OLS   Adj. R-squared:                  0.808
Method:                 Least Squares   F-statistic:                     15.99
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           2.85e-64
Time:                        03:22:54   Log-Likelihood:                 696.69
No. Observations:                 328   AIC:                            -1207.
Df Residuals:                     235   BIC:                            -854.6
Df Model:                          92                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Avoidance Group
#With CARea
#Score
#Scar0 + Scar_e0

for i in ['Scar0','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e0']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar0 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e0',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar0   R-squared:                       0.852
Model:                            OLS   Adj. R-squared:                  0.794
Method:                 Least Squares   F-statistic:                     14.70
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           8.04e-61
Time:                        03:22:55   Log-Likelihood:                -315.36
No. Observations:                 328   AIC:                             816.7
Df Residuals:                     235   BIC:                             1169.
Df Model:                          92                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Avoidance Group
#With CARea
#Score
#Scar1 + Scar_e1

for i in ['Scar1','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e1']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar1 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e1',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar1   R-squared:                       0.794
Model:                            OLS   Adj. R-squared:                  0.712
Method:                 Least Squares   F-statistic:                     9.718
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.68e-44
Time:                        03:22:57   Log-Likelihood:                -357.84
No. Observations:                 325   AIC:                             901.7
Df Residuals:                     232   BIC:                             1254.
Df Model:                          92                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Avoidance Group
#With CARea
#Score
#Scar2 + Scar_e2

for i in ['Scar2','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e2']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar2 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e2',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar2   R-squared:                       0.854
Model:                            OLS   Adj. R-squared:                  0.797
Method:                 Least Squares   F-statistic:                     14.95
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.59e-61
Time:                        03:22:59   Log-Likelihood:                -315.94
No. Observations:                 328   AIC:                             817.9
Df Residuals:                     235   BIC:                             1171.
Df Model:                          92                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Avoidance Group
#With CARea
#Score
#Scar3 + Scar_e3

for i in ['Scar3','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e3']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar3 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e3',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar3   R-squared:                       0.791
Model:                            OLS   Adj. R-squared:                  0.709
Method:                 Least Squares   F-statistic:                     9.660
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.24e-44
Time:                        03:23:01   Log-Likelihood:                -366.58
No. Observations:                 328   AIC:                             919.2
Df Residuals:                     235   BIC:                             1272.
Df Model:                          92                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Adoption Avoidance Group
#Without CARea
#Score


for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.566
Model:                            OLS   Adj. R-squared:                  0.327
Method:                 Least Squares   F-statistic:                     2.373
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.79e-06
Time:                        03:23:12   Log-Likelihood:                 335.36
No. Observations:                 238   AIC:                            -500.7
Df Residuals:                     153   BIC:                            -205.6
Df Model:                          84                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Adoption Avoidance Group
#With CARea
#Score
#Car_e0

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e0']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e0',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.571
Model:                            OLS   Adj. R-squared:                  0.331
Method:                 Least Squares   F-statistic:                     2.378
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.66e-06
Time:                        03:23:19   Log-Likelihood:                 336.74
No. Observations:                 238   AIC:                            -501.5
Df Residuals:                     152   BIC:                            -202.9
Df Model:                          85                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Adoption Avoidance Group
#With CARea
#Score
#Car_e1

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e1']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e1',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.569
Model:                            OLS   Adj. R-squared:                  0.328
Method:                 Least Squares   F-statistic:                     2.364
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.95e-06
Time:                        03:23:20   Log-Likelihood:                 336.32
No. Observations:                 238   AIC:                            -500.6
Df Residuals:                     152   BIC:                            -202.0
Df Model:                          85                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Adoption Avoidance Group
#With CARea
#Score
#Car_e2

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e2']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e2',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.571
Model:                            OLS   Adj. R-squared:                  0.331
Method:                 Least Squares   F-statistic:                     2.382
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.59e-06
Time:                        03:23:22   Log-Likelihood:                 336.85
No. Observations:                 238   AIC:                            -501.7
Df Residuals:                     152   BIC:                            -203.1
Df Model:                          85                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Adoption Avoidance Group
#With CARea
#Score
#Car_e3

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e3']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e3',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.569
Model:                            OLS   Adj. R-squared:                  0.329
Method:                 Least Squares   F-statistic:                     2.364
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.93e-06
Time:                        03:23:24   Log-Likelihood:                 336.34
No. Observations:                 238   AIC:                            -500.7
Df Residuals:                     152   BIC:                            -202.1
Df Model:                          85                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Adoption Avoidance Group
#With CARea
#Score
#Scar_e0

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e0']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e0',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.566
Model:                            OLS   Adj. R-squared:                  0.323
Method:                 Least Squares   F-statistic:                     2.331
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           2.78e-06
Time:                        03:23:25   Log-Likelihood:                 335.38
No. Observations:                 238   AIC:                            -498.8
Df Residuals:                     152   BIC:                            -200.2
Df Model:                          85                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Adoption Avoidance Group
#With CARea
#Score
#Scar_e1

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e1']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e1',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.566
Model:                            OLS   Adj. R-squared:                  0.319
Method:                 Least Squares   F-statistic:                     2.287
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           5.03e-06
Time:                        03:23:27   Log-Likelihood:                 329.86
No. Observations:                 235   AIC:                            -487.7
Df Residuals:                     149   BIC:                            -190.2
Df Model:                          85                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Adoption Avoidance Group
#With CARea
#Score
#Scar_e2

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e2']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e2',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.566
Model:                            OLS   Adj. R-squared:                  0.323
Method:                 Least Squares   F-statistic:                     2.332
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           2.75e-06
Time:                        03:23:28   Log-Likelihood:                 335.41
No. Observations:                 238   AIC:                            -498.8
Df Residuals:                     152   BIC:                            -200.2
Df Model:                          85                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Adoption Avoidance Group
#With CARea
#Score
#Scar_e3

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e3']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e3',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.566
Model:                            OLS   Adj. R-squared:                  0.323
Method:                 Least Squares   F-statistic:                     2.330
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           2.81e-06
Time:                        03:23:32   Log-Likelihood:                 335.36
No. Observations:                 238   AIC:                            -498.7
Df Residuals:                     152   BIC:                            -200.1
Df Model:                          85                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Time-series fixed effects(second model)
#Non-Adoption Group

import pandas as pd

import statsmodels.formula.api as sm
#import statsmodels.api as sm1

import numpy as np

df = pd.read_excel('/content/Non-Adoption Group Data August 7, 2021.xlsx') 

dfd = df[['Revision','cusip','fyear','Car0','Car1','Car2','Car3','Scar0','Scar1','Scar2','Scar3','Car_e0','Car_e1','Car_e2','Car_e3','Scar_e0', 'Scar_e1', 'Scar_e2','Scar_e3','Other information MD&A Recoded','Score','Raw Score','Size','Filelate','NewItems (excluding 0)','POST']]
#CL: I added all of the relevant data columns
dfd.rename(columns={'Other information MD&A Recoded':'Auditor_OI','NewItems (excluding 0)':'NewItems'},inplace=True)
dfd['year'] = pd.to_datetime(dfd['fyear'],format='%Y')


dfd['POScore'] = dfd['POST'] * dfd['Score']

#Investor
#Without CARea
#Score

for i in ['Car0','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car0 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())




Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car0   R-squared:                       0.683
Model:                            OLS   Adj. R-squared:                  0.518
Method:                 Least Squares   F-statistic:                     4.140
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           8.18e-06
Time:                        03:23:45   Log-Likelihood:                 158.08
No. Observations:                  77   AIC:                            -262.2
Df Residuals:                      50   BIC:                            -198.9
Df Model:                          26                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/index

In [None]:
#Investor
#Non-Adoption Group
#Without CARea
#Score
#Car1

for i in ['Car1','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car1 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car1   R-squared:                       0.630
Model:                            OLS   Adj. R-squared:                  0.438
Method:                 Least Squares   F-statistic:                     3.280
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           0.000154
Time:                        03:23:58   Log-Likelihood:                 165.41
No. Observations:                  77   AIC:                            -276.8
Df Residuals:                      50   BIC:                            -213.5
Df Model:                          26                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Non-Adoption Group
#Without CARea
#Score
#Car2

for i in ['Car2','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car2 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car2   R-squared:                       0.701
Model:                            OLS   Adj. R-squared:                  0.545
Method:                 Least Squares   F-statistic:                     4.500
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           2.58e-06
Time:                        03:23:59   Log-Likelihood:                 162.17
No. Observations:                  77   AIC:                            -270.3
Df Residuals:                      50   BIC:                            -207.1
Df Model:                          26                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Non-Adoption Group
#Without CARea
#Score
#Car3

for i in ['Car3','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car3 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car3   R-squared:                       0.633
Model:                            OLS   Adj. R-squared:                  0.442
Method:                 Least Squares   F-statistic:                     3.315
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           0.000136
Time:                        03:24:01   Log-Likelihood:                 168.53
No. Observations:                  77   AIC:                            -283.1
Df Residuals:                      50   BIC:                            -219.8
Df Model:                          26                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Non-Adoption Group
#Without CARea
#Score
#Scar0

for i in ['Scar0','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar0 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar0   R-squared:                       0.421
Model:                            OLS   Adj. R-squared:                  0.120
Method:                 Least Squares   F-statistic:                     1.398
Date:                Sun, 15 Aug 2021   Prob (F-statistic):              0.153
Time:                        03:24:02   Log-Likelihood:                -107.35
No. Observations:                  77   AIC:                             268.7
Df Residuals:                      50   BIC:                             332.0
Df Model:                          26                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Non-Adoption Group
#Without CARea
#Score
#Scar1

for i in ['Scar1','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar1 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar1   R-squared:                       0.381
Model:                            OLS   Adj. R-squared:                  0.059
Method:                 Least Squares   F-statistic:                     1.182
Date:                Sun, 15 Aug 2021   Prob (F-statistic):              0.300
Time:                        03:24:04   Log-Likelihood:                -106.28
No. Observations:                  77   AIC:                             266.6
Df Residuals:                      50   BIC:                             329.8
Df Model:                          26                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Non-Adoption Group
#Without CARea
#Score
#Scar2

for i in ['Scar2','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar2 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar2   R-squared:                       0.466
Model:                            OLS   Adj. R-squared:                  0.188
Method:                 Least Squares   F-statistic:                     1.677
Date:                Sun, 15 Aug 2021   Prob (F-statistic):             0.0580
Time:                        03:24:05   Log-Likelihood:                -104.30
No. Observations:                  77   AIC:                             262.6
Df Residuals:                      50   BIC:                             325.9
Df Model:                          26                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Non-Adoption Group
#Without CARea
#Score
#Scar3

for i in ['Scar3','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar3 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar3   R-squared:                       0.399
Model:                            OLS   Adj. R-squared:                  0.087
Method:                 Least Squares   F-statistic:                     1.278
Date:                Sun, 15 Aug 2021   Prob (F-statistic):              0.225
Time:                        03:24:07   Log-Likelihood:                -105.25
No. Observations:                  77   AIC:                             264.5
Df Residuals:                      50   BIC:                             327.8
Df Model:                          26                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Adoption Group
#With CARea
#Score
#Car0 + Car_e0

for i in ['Car0','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e0']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car0 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e0',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car0   R-squared:                       0.752
Model:                            OLS   Adj. R-squared:                  0.615
Method:                 Least Squares   F-statistic:                     5.502
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.29e-07
Time:                        03:24:08   Log-Likelihood:                 167.54
No. Observations:                  77   AIC:                            -279.1
Df Residuals:                      49   BIC:                            -213.5
Df Model:                          27                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Non-Adoption Group
#With CARea
#Score
#Car1 + Car_e1

for i in ['Car1','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e1']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car1 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e1',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car1   R-squared:                       0.765
Model:                            OLS   Adj. R-squared:                  0.636
Method:                 Least Squares   F-statistic:                     5.922
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           4.04e-08
Time:                        03:24:11   Log-Likelihood:                 182.91
No. Observations:                  77   AIC:                            -309.8
Df Residuals:                      49   BIC:                            -244.2
Df Model:                          27                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Non-Adoption Group
#With CARea
#Score
#Car2 + Car_e2

for i in ['Car2','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e2']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car2 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e2',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car2   R-squared:                       0.753
Model:                            OLS   Adj. R-squared:                  0.616
Method:                 Least Squares   F-statistic:                     5.520
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.22e-07
Time:                        03:24:12   Log-Likelihood:                 169.51
No. Observations:                  77   AIC:                            -283.0
Df Residuals:                      49   BIC:                            -217.4
Df Model:                          27                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Non-Adoption Group
#With CARea
#Score
#Car3 + Car_e3

for i in ['Car3','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e3']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Car3 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e3',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                   Car3   R-squared:                       0.749
Model:                            OLS   Adj. R-squared:                  0.611
Method:                 Least Squares   F-statistic:                     5.422
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.61e-07
Time:                        03:24:14   Log-Likelihood:                 183.20
No. Observations:                  77   AIC:                            -310.4
Df Residuals:                      49   BIC:                            -244.8
Df Model:                          27                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Non-Adoption Group
#With CARea
#Score
#Scar0 + Scar_e0

for i in ['Scar0','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e0']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar0 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e0',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar0   R-squared:                       0.524
Model:                            OLS   Adj. R-squared:                  0.261
Method:                 Least Squares   F-statistic:                     1.996
Date:                Sun, 15 Aug 2021   Prob (F-statistic):             0.0175
Time:                        03:24:16   Log-Likelihood:                -99.822
No. Observations:                  77   AIC:                             255.6
Df Residuals:                      49   BIC:                             321.3
Df Model:                          27                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Non-Adoption Group
#With CARea
#Score
#Scar1 + Scar_e1

for i in ['Scar1','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e1']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar1 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e1',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar1   R-squared:                       0.546
Model:                            OLS   Adj. R-squared:                  0.295
Method:                 Least Squares   F-statistic:                     2.179
Date:                Sun, 15 Aug 2021   Prob (F-statistic):            0.00878
Time:                        03:24:19   Log-Likelihood:                -94.357
No. Observations:                  77   AIC:                             244.7
Df Residuals:                      49   BIC:                             310.3
Df Model:                          27                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Non-Adoption Group
#With CARea
#Score
#Scar2 + Scar_e2

for i in ['Scar2','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e2']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar2 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e2',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar2   R-squared:                       0.547
Model:                            OLS   Adj. R-squared:                  0.297
Method:                 Least Squares   F-statistic:                     2.190
Date:                Sun, 15 Aug 2021   Prob (F-statistic):            0.00841
Time:                        03:24:21   Log-Likelihood:                -97.969
No. Observations:                  77   AIC:                             251.9
Df Residuals:                      49   BIC:                             317.6
Df Model:                          27                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Investor
#Non-Adoption Group
#With CARea
#Score
#Scar3 + Scar_e3

for i in ['Scar3','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e3']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Scar3 ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e3',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:                  Scar3   R-squared:                       0.538
Model:                            OLS   Adj. R-squared:                  0.284
Method:                 Least Squares   F-statistic:                     2.116
Date:                Sun, 15 Aug 2021   Prob (F-statistic):             0.0111
Time:                        03:24:22   Log-Likelihood:                -95.107
No. Observations:                  77   AIC:                             246.2
Df Residuals:                      49   BIC:                             311.8
Df Model:                          27                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Non-Adoption Group
#Without CARea
#Score


for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.720
Model:                            OLS   Adj. R-squared:                  0.538
Method:                 Least Squares   F-statistic:                     3.961
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           8.89e-05
Time:                        03:24:24   Log-Likelihood:                 118.64
No. Observations:                  62   AIC:                            -187.3
Df Residuals:                      37   BIC:                            -134.1
Df Model:                          24                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Non-Adoption Group
#With CARea
#Score
#Car_e0

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e0']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e0',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.773
Model:                            OLS   Adj. R-squared:                  0.611
Method:                 Least Squares   F-statistic:                     4.773
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.44e-05
Time:                        03:24:30   Log-Likelihood:                 122.68
No. Observations:                  61   AIC:                            -193.4
Df Residuals:                      35   BIC:                            -138.5
Df Model:                          25                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Non-Adoption Group
#With CARea
#Score
#Car_e1

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e1']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e1',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.777
Model:                            OLS   Adj. R-squared:                  0.618
Method:                 Least Squares   F-statistic:                     4.886
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.11e-05
Time:                        03:24:33   Log-Likelihood:                 123.23
No. Observations:                  61   AIC:                            -194.5
Df Residuals:                      35   BIC:                            -139.6
Df Model:                          25                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Non-Adoption Group
#With CARea
#Score
#Car_e2

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e2']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e2',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.765
Model:                            OLS   Adj. R-squared:                  0.598
Method:                 Least Squares   F-statistic:                     4.567
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           2.34e-05
Time:                        03:24:34   Log-Likelihood:                 121.64
No. Observations:                  61   AIC:                            -191.3
Df Residuals:                      35   BIC:                            -136.4
Df Model:                          25                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Non-Adoption Group
#With CARea
#Score
#Car_e3

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Car_e3']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Car_e3',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.773
Model:                            OLS   Adj. R-squared:                  0.610
Method:                 Least Squares   F-statistic:                     4.755
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           1.50e-05
Time:                        03:24:35   Log-Likelihood:                 122.59
No. Observations:                  61   AIC:                            -193.2
Df Residuals:                      35   BIC:                            -138.3
Df Model:                          25                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Non-Adoption Group
#With CARea
#Score
#Scar_e0

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e0']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e0',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.726
Model:                            OLS   Adj. R-squared:                  0.530
Method:                 Least Squares   F-statistic:                     3.701
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           0.000208
Time:                        03:24:37   Log-Likelihood:                 116.86
No. Observations:                  61   AIC:                            -181.7
Df Residuals:                      35   BIC:                            -126.8
Df Model:                          25                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Non-Adoption Group
#With CARea
#Score
#Scar_e1

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e1']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e1',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.726
Model:                            OLS   Adj. R-squared:                  0.530
Method:                 Least Squares   F-statistic:                     3.706
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           0.000205
Time:                        03:24:39   Log-Likelihood:                 116.89
No. Observations:                  61   AIC:                            -181.8
Df Residuals:                      35   BIC:                            -126.9
Df Model:                          25                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Non-Adoption Group
#With CARea
#Score
#Scar_e2

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e2']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e2',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.724
Model:                            OLS   Adj. R-squared:                  0.526
Method:                 Least Squares   F-statistic:                     3.664
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           0.000229
Time:                        03:24:40   Log-Likelihood:                 116.63
No. Observations:                  61   AIC:                            -181.3
Df Residuals:                      35   BIC:                            -126.4
Df Model:                          25                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Analyst
#Non-Adoption Group
#With CARea
#Score
#Scar_e3

for i in ['Revision','Score','Size','Filelate' ,'NewItems', 'POST', 'POScore','Scar_e3']:
  mean = dfd.groupby('year')[i].transform(np.mean)
  dfd[i] = dfd[i] - mean

ols2 = sm.ols(formula='Revision ~ Score+ Size+ NewItems+ C(cusip)+ Filelate+ POST + POScore + Scar_e3',
                          data=dfd).fit()
#C(cusip) is the company fixed effects
print('\nSecond model result\n')

print(ols2.summary())


Second model result

                            OLS Regression Results                            
Dep. Variable:               Revision   R-squared:                       0.724
Model:                            OLS   Adj. R-squared:                  0.527
Method:                 Least Squares   F-statistic:                     3.672
Date:                Sun, 15 Aug 2021   Prob (F-statistic):           0.000224
Time:                        03:24:41   Log-Likelihood:                 116.69
No. Observations:                  61   AIC:                            -181.4
Df Residuals:                      35   BIC:                            -126.5
Df Model:                          25                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercep

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
#Other Formulas for statsmodels:

#dfdd = dfd.dropna(subset=['Car_e1','Score','Size','Filelate' ,'NewItems', 'Auditor_OI', 'AuScore'])
#dfdd = dfd.dropna()


In [None]:
dfd['cik']

0        2809
1        2809
2        2809
3        2809
4      885590
       ...   
88    1318220
89    1318220
90    1318220
91    1318220
92    1318220
Name: cik, Length: 93, dtype: int64

In [None]:
pip install linearmodels

Collecting linearmodels
  Downloading linearmodels-4.24-cp37-cp37m-manylinux1_x86_64.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 5.3 MB/s 
Collecting statsmodels>=0.11
  Downloading statsmodels-0.12.2-cp37-cp37m-manylinux1_x86_64.whl (9.5 MB)
[K     |████████████████████████████████| 9.5 MB 20.5 MB/s 
Collecting mypy-extensions>=0.4
  Downloading mypy_extensions-0.4.3-py2.py3-none-any.whl (4.5 kB)
Collecting property-cached>=1.6.3
  Downloading property_cached-1.6.4-py2.py3-none-any.whl (7.8 kB)
Collecting pyhdfe>=0.1
  Downloading pyhdfe-0.1.0-py3-none-any.whl (18 kB)
Installing collected packages: statsmodels, pyhdfe, property-cached, mypy-extensions, linearmodels
  Attempting uninstall: statsmodels
    Found existing installation: statsmodels 0.10.2
    Uninstalling statsmodels-0.10.2:
      Successfully uninstalled statsmodels-0.10.2
Successfully installed linearmodels-4.24 mypy-extensions-0.4.3 property-cached-1.6.4 pyhdfe-0.1.0 statsmodels-0.12.2


In [None]:

import pandas as pd
from linearmodels.panel import PanelOLS
from statsmodels.tools.tools import add_constant

import numpy as np

df = pd.read_excel('/content/Treatment Group August 7, 2021.xlsx') 

#Alternative for fixed effect time-series
dfd = df[['Revision','cusip','fyear','Car0','Car1','Car2','Car3','Scar0','Scar1','Scar2','Scar3','Car_e0','Car_e1','Car_e2','Car_e3','Scar_e0', 'Scar_e1', 'Scar_e2','Scar_e3','Other information MD&A Recoded','Score','Raw Score','Size','Filelate','NewItems (excluding 0)','POST']]

dfd.rename(columns={'Other information MD&A Recoded':'Auditor_OI','NewItems (excluding 0)':'NewItems'},inplace=True)

dfd['year'] = dfd['fyear'].astype(int)
#dfd['year'] = pd.Categorical(dfd.year)
dfd['year'] = pd.to_datetime(dfd.year, format='%Y')
dfd['cusip'] = pd.Categorical(dfd['cusip'])
dfd = dfd.set_index(['cusip','year'])
dfd['POScore'] = dfd['POST'] * dfd['Score']

#Car0
#Score

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Car0, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/use

0,1,2,3
Dep. Variable:,Car0,R-squared:,0.0153
Estimator:,PanelOLS,R-squared (Between):,-0.6049
No. Observations:,725,R-squared (Within):,-0.0179
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.2042
Time:,18:47:33,Log-likelihood,1203.7
Cov. Estimator:,Clustered,,
,,F-statistic:,1.3414
Entities:,198,P-value,0.2368
Avg Obs:,3.6616,Distribution:,"F(6,519)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.0844,0.2211,-0.3819,0.7027,-0.5188,0.3500
Score,-0.0203,0.3393,-0.0599,0.9523,-0.6869,0.6463
Size,0.0105,0.0100,1.0477,0.2953,-0.0092,0.0302
NewItems,0.0005,0.0004,1.3700,0.1713,-0.0002,0.0013
Filelate,0.1002,0.0417,2.4007,0.0167,0.0182,0.1822
POST,0.1356,0.1996,0.6795,0.4971,-0.2565,0.5278
POScore,-0.1778,0.2426,-0.7328,0.4640,-0.6544,0.2988


In [None]:
#Investor
#Without CARea
#Treatment Group
#Score
#Car1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Car1, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )
result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Car1,R-squared:,0.0255
Estimator:,PanelOLS,R-squared (Between):,-1.9127
No. Observations:,725,R-squared (Within):,-0.0502
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.6726
Time:,00:23:24,Log-likelihood,1234.0
Cov. Estimator:,Clustered,,
,,F-statistic:,2.2632
Entities:,198,P-value,0.0363
Avg Obs:,3.6616,Distribution:,"F(6,519)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.2917,0.1517,-1.9226,0.0551,-0.5898,0.0064
Score,0.1590,0.2622,0.6065,0.5445,-0.3561,0.6742
Size,0.0209,0.0128,1.6357,0.1025,-0.0042,0.0461
NewItems,0.0004,0.0004,1.0291,0.3039,-0.0003,0.0011
Filelate,0.1398,0.0596,2.3446,0.0194,0.0227,0.2569
POST,0.0354,0.1537,0.2301,0.8181,-0.2666,0.3374
POScore,-0.0666,0.1852,-0.3595,0.7194,-0.4304,0.2972


In [None]:
#Investor
#Without CARea
#Treatment Group
#Score
#Car2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Car2, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )
result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Car2,R-squared:,0.0151
Estimator:,PanelOLS,R-squared (Between):,-0.5874
No. Observations:,725,R-squared (Within):,-0.0170
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.1979
Time:,01:20:44,Log-likelihood,1202.4
Cov. Estimator:,Clustered,,
,,F-statistic:,1.3236
Entities:,198,P-value,0.2447
Avg Obs:,3.6616,Distribution:,"F(6,519)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.0791,0.2205,-0.3588,0.7199,-0.5123,0.3541
Score,-0.0242,0.3404,-0.0712,0.9433,-0.6930,0.6445
Size,0.0106,0.0102,1.0414,0.2982,-0.0094,0.0307
NewItems,0.0005,0.0004,1.3011,0.1938,-0.0003,0.0013
Filelate,0.1006,0.0424,2.3745,0.0179,0.0174,0.1838
POST,0.1364,0.1986,0.6868,0.4925,-0.2538,0.5266
POScore,-0.1787,0.2413,-0.7405,0.4593,-0.6528,0.2954


In [None]:
#Investor
#Without CARea
#Treatment Group
#Score
#Car3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Car3, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )
result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Car3,R-squared:,0.0252
Estimator:,PanelOLS,R-squared (Between):,-1.8637
No. Observations:,725,R-squared (Within):,-0.0537
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.6617
Time:,18:47:53,Log-likelihood,1235.8
Cov. Estimator:,Clustered,,
,,F-statistic:,2.2406
Entities:,198,P-value,0.0382
Avg Obs:,3.6616,Distribution:,"F(6,519)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.2917,0.1499,-1.9451,0.0523,-0.5862,0.0029
Score,0.1674,0.2600,0.6439,0.5199,-0.3434,0.6782
Size,0.0205,0.0128,1.6033,0.1095,-0.0046,0.0457
NewItems,0.0003,0.0004,0.9679,0.3336,-0.0004,0.0010
Filelate,0.1389,0.0591,2.3495,0.0192,0.0228,0.2551
POST,0.0331,0.1523,0.2173,0.8280,-0.2662,0.3324
POScore,-0.0642,0.1834,-0.3500,0.7265,-0.4246,0.2962


In [None]:
#Investor
#Without CARea
#Treatment Group
#Score
#Scar0

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Scar0, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )
result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Scar0,R-squared:,0.0060
Estimator:,PanelOLS,R-squared (Between):,-0.5475
No. Observations:,725,R-squared (Within):,0.0193
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.1776
Time:,18:50:24,Log-likelihood,-1282.0
Cov. Estimator:,Clustered,,
,,F-statistic:,0.5216
Entities:,198,P-value,0.7921
Avg Obs:,3.6616,Distribution:,"F(6,519)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-2.5181,8.2490,-0.3053,0.7603,-18.724,13.687
Score,-1.0015,11.286,-0.0887,0.9293,-23.174,21.171
Size,0.5312,0.2837,1.8722,0.0617,-0.0262,1.0886
NewItems,0.0042,0.0127,0.3337,0.7387,-0.0207,0.0292
Filelate,1.1042,0.5767,1.9147,0.0561,-0.0288,2.2372
POST,1.9617,5.4225,0.3618,0.7177,-8.6910,12.614
POScore,-2.1963,6.6009,-0.3327,0.7395,-15.164,10.772


In [None]:
#Investor
#Without CARea
#Treatment Group
#Score
#Scar1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Scar1, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )
result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Scar1,R-squared:,0.0128
Estimator:,PanelOLS,R-squared (Between):,-1.7253
No. Observations:,725,R-squared (Within):,0.0189
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.5719
Time:,18:50:14,Log-likelihood,-1301.9
Cov. Estimator:,Clustered,,
,,F-statistic:,1.1196
Entities:,198,P-value,0.3495
Avg Obs:,3.6616,Distribution:,"F(6,519)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-9.3727,6.0486,-1.5496,0.1219,-21.255,2.5099
Score,5.4935,8.7065,0.6310,0.5283,-11.611,22.598
Size,0.8025,0.3163,2.5367,0.0115,0.1810,1.4239
NewItems,0.0004,0.0116,0.0382,0.9696,-0.0224,0.0233
Filelate,1.8666,0.7342,2.5424,0.0113,0.4243,3.3090
POST,-0.1725,4.7725,-0.0361,0.9712,-9.5482,9.2032
POScore,-0.0055,5.8823,-0.0009,0.9993,-11.562,11.551


In [None]:
#Investor
#Without CARea
#Treatment Group
#Score
#Scar2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Scar2, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )
result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Scar2,R-squared:,0.0058
Estimator:,PanelOLS,R-squared (Between):,-0.4895
No. Observations:,725,R-squared (Within):,0.0186
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.1616
Time:,18:49:56,Log-likelihood,-1287.3
Cov. Estimator:,Clustered,,
,,F-statistic:,0.5045
Entities:,198,P-value,0.8051
Avg Obs:,3.6616,Distribution:,"F(6,519)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-2.4076,8.2294,-0.2926,0.7700,-18.575,13.759
Score,-0.9036,11.365,-0.0795,0.9367,-23.230,21.423
Size,0.5226,0.2853,1.8315,0.0676,-0.0379,1.0831
NewItems,0.0034,0.0129,0.2615,0.7938,-0.0219,0.0287
Filelate,1.0948,0.5826,1.8792,0.0608,-0.0497,2.2394
POST,2.2391,5.4275,0.4125,0.6801,-8.4235,12.902
POScore,-2.5347,6.6131,-0.3833,0.7017,-15.526,10.457


In [None]:
#Investor
#Without CARea
#Treatment Group
#Score
#Scar3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Scar3, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )
result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Scar3,R-squared:,0.0125
Estimator:,PanelOLS,R-squared (Between):,-1.6059
No. Observations:,725,R-squared (Within):,0.0169
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.5418
Time:,18:51:18,Log-likelihood,-1304.9
Cov. Estimator:,Clustered,,
,,F-statistic:,1.0928
Entities:,198,P-value,0.3654
Avg Obs:,3.6616,Distribution:,"F(6,519)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-9.4141,6.0383,-1.5591,0.1196,-21.277,2.4485
Score,5.9600,8.7801,0.6788,0.4976,-11.289,23.209
Size,0.7842,0.3147,2.4923,0.0130,0.1661,1.4024
NewItems,-0.0009,0.0117,-0.0759,0.9395,-0.0239,0.0221
Filelate,1.8422,0.7276,2.5319,0.0116,0.4128,3.2717
POST,-0.0159,4.8542,-0.0033,0.9974,-9.5522,9.5205
POScore,-0.2236,5.9888,-0.0373,0.9702,-11.989,11.542


In [None]:
import pandas as pd
from linearmodels.panel import PanelOLS
from statsmodels.tools.tools import add_constant

import numpy as np

df = pd.read_excel('/content/Treatment Group August 7, 2021.xlsx') 

#Alternative for fixed effect time-series
dfd = df[['Revision','cusip','fyear','Car0','Car1','Car2','Car3','Scar0','Scar1','Scar2','Scar3','Car_e0','Car_e1','Car_e2','Car_e3','Scar_e0', 'Scar_e1', 'Scar_e2','Scar_e3','Other information MD&A Recoded','Score','Raw Score','Size','Filelate','NewItems (excluding 0)','POST']]

dfd.rename(columns={'Other information MD&A Recoded':'Auditor_OI','NewItems (excluding 0)':'NewItems'},inplace=True)

dfd['year'] = dfd['fyear'].astype(int)
#dfd['year'] = pd.Categorical(dfd.year)
dfd['year'] = pd.to_datetime(dfd.year, format='%Y')
dfd['cusip'] = pd.Categorical(dfd['cusip'])
dfd = dfd.set_index(['cusip','year'])
dfd['POScore'] = dfd['POST'] * dfd['Score']

#Score
#CAR0 and CAR_e0
exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e0']])

FE = PanelOLS(dfd.Car0, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/in

0,1,2,3
Dep. Variable:,Car0,R-squared:,0.8878
Estimator:,PanelOLS,R-squared (Between):,0.5293
No. Observations:,725,R-squared (Within):,0.8932
Date:,"Sat, Aug 14 2021",R-squared (Overall):,0.7741
Time:,18:52:02,Log-likelihood,1991.0
Cov. Estimator:,Clustered,,
,,F-statistic:,585.31
Entities:,198,P-value,0.0000
Avg Obs:,3.6616,Distribution:,"F(7,518)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0640,0.0680,0.9399,0.3477,-0.0697,0.1976
Score,-0.0498,0.0802,-0.6206,0.5351,-0.2074,0.1078
Size,0.0045,0.0054,0.8385,0.4022,-0.0061,0.0151
NewItems,-0.0005,0.0003,-1.6567,0.0982,-0.0010,8.846e-05
Filelate,0.0184,0.0159,1.1569,0.2479,-0.0128,0.0496
POST,-0.0149,0.0288,-0.5166,0.6057,-0.0715,0.0418
POScore,0.0223,0.0331,0.6724,0.5016,-0.0428,0.0874
Car_e0,0.8780,0.0638,13.769,0.0000,0.7528,1.0033


In [None]:
#Investor
#With CARea
#Treatment Group
#Score
#CAR1 and CAR_e1
exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e1']])

FE = PanelOLS(dfd.Car1, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Car1,R-squared:,0.8565
Estimator:,PanelOLS,R-squared (Between):,0.3593
No. Observations:,725,R-squared (Within):,0.8655
Date:,"Fri, Aug 13 2021",R-squared (Overall):,0.6927
Time:,21:30:08,Log-likelihood,1928.3
Cov. Estimator:,Clustered,,
,,F-statistic:,441.62
Entities:,198,P-value,0.0000
Avg Obs:,3.6616,Distribution:,"F(7,518)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0474,0.0774,0.6121,0.5408,-0.1047,0.1994
Score,-0.0535,0.0922,-0.5807,0.5617,-0.2346,0.1275
Size,0.0072,0.0070,1.0248,0.3059,-0.0066,0.0209
NewItems,-0.0005,0.0003,-1.4235,0.1552,-0.0012,0.0002
Filelate,0.0262,0.0215,1.2194,0.2232,-0.0160,0.0684
POST,-0.0686,0.0513,-1.3377,0.1816,-0.1694,0.0322
POScore,0.0849,0.0594,1.4297,0.1534,-0.0318,0.2016
Car_e1,0.8775,0.0639,13.740,0.0000,0.7521,1.0030


In [None]:
#Investor
#With CARea
#Treatment Group
#Score
#CAR2 and CAR_e2
exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e2']])

FE = PanelOLS(dfd.Car2, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Car2,R-squared:,0.8882
Estimator:,PanelOLS,R-squared (Between):,0.5281
No. Observations:,725,R-squared (Within):,0.8934
Date:,"Fri, Aug 13 2021",R-squared (Overall):,0.7739
Time:,21:31:29,Log-likelihood,1991.3
Cov. Estimator:,Clustered,,
,,F-statistic:,588.02
Entities:,198,P-value,0.0000
Avg Obs:,3.6616,Distribution:,"F(7,518)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0669,0.0673,0.9939,0.3208,-0.0654,0.1992
Score,-0.0540,0.0798,-0.6774,0.4984,-0.2107,0.1026
Size,0.0046,0.0054,0.8409,0.4008,-0.0061,0.0153
NewItems,-0.0005,0.0003,-1.6372,0.1022,-0.0010,9.537e-05
Filelate,0.0185,0.0160,1.1547,0.2488,-0.0130,0.0499
POST,-0.0125,0.0282,-0.4453,0.6563,-0.0679,0.0428
POScore,0.0194,0.0324,0.5989,0.5495,-0.0442,0.0830
Car_e2,0.8786,0.0636,13.812,0.0000,0.7536,1.0036


In [None]:
#Investor
#With CARea
#Treatment Group
#Score
#CAR3 and CAR_e3
exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e3']])

FE = PanelOLS(dfd.Car3, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Car3,R-squared:,0.8534
Estimator:,PanelOLS,R-squared (Between):,0.3675
No. Observations:,725,R-squared (Within):,0.8628
Date:,"Sat, Aug 14 2021",R-squared (Overall):,0.6926
Time:,18:52:45,Log-likelihood,1922.5
Cov. Estimator:,Clustered,,
,,F-statistic:,430.63
Entities:,198,P-value,0.0000
Avg Obs:,3.6616,Distribution:,"F(7,518)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0462,0.0783,0.5898,0.5556,-0.1076,0.1999
Score,-0.0489,0.0933,-0.5241,0.6004,-0.2321,0.1344
Size,0.0069,0.0070,0.9900,0.3226,-0.0068,0.0206
NewItems,-0.0005,0.0003,-1.4278,0.1540,-0.0012,0.0002
Filelate,0.0260,0.0215,1.2128,0.2258,-0.0161,0.0682
POST,-0.0726,0.0530,-1.3680,0.1719,-0.1767,0.0316
POScore,0.0892,0.0615,1.4508,0.1474,-0.0316,0.2100
Car_e3,0.8750,0.0649,13.477,0.0000,0.7474,1.0025


In [None]:
#Investor
#With CARea
#Treatment Group
#Score
#Scar0 and Scar_e0
exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e0']])

FE = PanelOLS(dfd.Scar0, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Scar0,R-squared:,0.8917
Estimator:,PanelOLS,R-squared (Between):,0.5456
No. Observations:,725,R-squared (Within):,0.8935
Date:,"Sat, Aug 14 2021",R-squared (Overall):,0.7742
Time:,18:54:10,Log-likelihood,-478.52
Cov. Estimator:,Clustered,,
,,F-statistic:,609.11
Entities:,198,P-value,0.0000
Avg Obs:,3.6616,Distribution:,"F(7,518)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.9850,1.8430,0.5345,0.5933,-2.6356,4.6056
Score,-0.2039,1.8597,-0.1096,0.9128,-3.8574,3.4497
Size,0.1171,0.1278,0.9161,0.3600,-0.1340,0.3681
NewItems,-0.0149,0.0063,-2.3730,0.0180,-0.0272,-0.0026
Filelate,0.2348,0.2124,1.1054,0.2695,-0.1825,0.6520
POST,-0.0518,1.2310,-0.0421,0.9665,-2.4702,2.3667
POScore,0.3015,1.4136,0.2133,0.8312,-2.4756,3.0786
Scar_e0,0.9202,0.0226,40.799,0.0000,0.8758,0.9645


In [None]:
#Investor
#With CARea
#Treatment Group
#Score
#Scar1 and Scar_e1
exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e1']])

FE = PanelOLS(dfd.Scar1, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Scar1,R-squared:,0.8638
Estimator:,PanelOLS,R-squared (Between):,0.5303
No. Observations:,725,R-squared (Within):,0.8700
Date:,"Sat, Aug 14 2021",R-squared (Overall):,0.7498
Time:,18:54:43,Log-likelihood,-583.79
Cov. Estimator:,Clustered,,
,,F-statistic:,469.49
Entities:,198,P-value,0.0000
Avg Obs:,3.6616,Distribution:,"F(7,518)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.5945,1.9191,0.3098,0.7568,-3.1756,4.3647
Score,0.0315,2.1575,0.0146,0.9884,-4.2071,4.2701
Size,0.1424,0.1378,1.0331,0.3021,-0.1284,0.4131
NewItems,-0.0146,0.0071,-2.0592,0.0400,-0.0285,-0.0007
Filelate,0.3151,0.2375,1.3266,0.1852,-0.1515,0.7816
POST,-1.6623,2.2478,-0.7395,0.4599,-6.0782,2.7537
POScore,2.1812,2.6694,0.8171,0.4142,-3.0629,7.4254
Scar_e1,0.9152,0.0244,37.505,0.0000,0.8673,0.9632


In [None]:
#Investor
#With CARea
#Treatment Group
#Score
#Scar2 and Scar_e2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e2']])

FE = PanelOLS(dfd.Scar2, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Scar2,R-squared:,0.8917
Estimator:,PanelOLS,R-squared (Between):,0.5524
No. Observations:,725,R-squared (Within):,0.8936
Date:,"Sat, Aug 14 2021",R-squared (Overall):,0.7754
Time:,19:36:08,Log-likelihood,-483.55
Cov. Estimator:,Clustered,,
,,F-statistic:,609.42
Entities:,198,P-value,0.0000
Avg Obs:,3.6616,Distribution:,"F(7,518)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,1.0545,1.8275,0.5770,0.5642,-2.5356,4.6447
Score,-0.2808,1.8459,-0.1521,0.8791,-3.9072,3.3456
Size,0.1183,0.1277,0.9260,0.3549,-0.1327,0.3693
NewItems,-0.0150,0.0063,-2.3730,0.0180,-0.0274,-0.0026
Filelate,0.2386,0.2107,1.1324,0.2580,-0.1754,0.6526
POST,0.0067,1.2164,0.0055,0.9956,-2.3829,2.3962
POScore,0.2205,1.3965,0.1579,0.8746,-2.5231,2.9640
Scar_e2,0.9201,0.0228,40.344,0.0000,0.8753,0.9650


In [None]:
#Investor
#With CARea
#Treatment Group
#Scar3 and Scar_e3
#Score

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e3']])

FE = PanelOLS(dfd.Scar3, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Scar3,R-squared:,0.8612
Estimator:,PanelOLS,R-squared (Between):,0.5385
No. Observations:,725,R-squared (Within):,0.8679
Date:,"Sat, Aug 14 2021",R-squared (Overall):,0.7495
Time:,19:36:37,Log-likelihood,-593.76
Cov. Estimator:,Clustered,,
,,F-statistic:,458.98
Entities:,198,P-value,0.0000
Avg Obs:,3.6616,Distribution:,"F(7,518)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.5853,1.9377,0.3021,0.7627,-3.2214,4.3919
Score,0.1558,2.1919,0.0711,0.9434,-4.1503,4.4618
Size,0.1353,0.1381,0.9799,0.3276,-0.1360,0.4067
NewItems,-0.0148,0.0071,-2.0754,0.0384,-0.0288,-0.0008
Filelate,0.3078,0.2347,1.3113,0.1903,-0.1533,0.7690
POST,-1.8106,2.3392,-0.7740,0.4393,-6.4062,2.7849
POScore,2.3433,2.7845,0.8415,0.4004,-3.1271,7.8136
Scar_e3,0.9141,0.0247,36.935,0.0000,0.8654,0.9627


In [None]:
#Analyst
#Without CARea
#Treatment Group
#Score

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0485
Estimator:,PanelOLS,R-squared (Between):,-9.8520
No. Observations:,527,R-squared (Within):,-0.0478
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-4.2191
Time:,19:40:02,Log-likelihood,618.78
Cov. Estimator:,Clustered,,
,,F-statistic:,2.9639
Entities:,198,P-value,0.0078
Avg Obs:,2.6616,Distribution:,"F(6,349)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.9100,0.4548,2.0010,0.0462,0.0155,1.8044
Score,0.3747,0.3975,0.9427,0.3465,-0.4071,1.1566
Size,-0.1244,0.0740,-1.6808,0.0937,-0.2699,0.0212
NewItems,-0.0010,0.0011,-0.8922,0.3729,-0.0032,0.0012
Filelate,-0.2290,0.0889,-2.5755,0.0104,-0.4038,-0.0541
POST,0.3397,0.2869,1.1839,0.2373,-0.2247,0.9041
POScore,-0.4279,0.3541,-1.2084,0.2277,-1.1243,0.2685


In [None]:
#Analyst
#With CARea
#Treatment Group
#Score
#Car_e0

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e0']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0485
Estimator:,PanelOLS,R-squared (Between):,-9.8495
No. Observations:,527,R-squared (Within):,-0.0479
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-4.2180
Time:,19:42:04,Log-likelihood,618.78
Cov. Estimator:,Clustered,,
,,F-statistic:,2.5332
Entities:,198,P-value,0.0149
Avg Obs:,2.6616,Distribution:,"F(7,348)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.9097,0.4513,2.0156,0.0446,0.0220,1.7974
Score,0.3748,0.3983,0.9408,0.3474,-0.4087,1.1583
Size,-0.1243,0.0739,-1.6830,0.0933,-0.2697,0.0210
NewItems,-0.0010,0.0011,-0.8926,0.3727,-0.0032,0.0012
Filelate,-0.2288,0.0864,-2.6475,0.0085,-0.3988,-0.0588
POST,0.3399,0.2843,1.1955,0.2327,-0.2193,0.8991
POScore,-0.4281,0.3513,-1.2185,0.2238,-1.1192,0.2629
Car_e0,-0.0008,0.0586,-0.0137,0.9891,-0.1160,0.1144


In [None]:
#Analyst
#With CARea
#Treatment Group
#Score
#Car_e1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e1']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0487
Estimator:,PanelOLS,R-squared (Between):,-10.031
No. Observations:,527,R-squared (Within):,-0.0462
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-4.2983
Time:,19:42:24,Log-likelihood,618.85
Cov. Estimator:,Clustered,,
,,F-statistic:,2.5457
Entities:,198,P-value,0.0144
Avg Obs:,2.6616,Distribution:,"F(7,348)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.9272,0.4607,2.0124,0.0449,0.0210,1.8334
Score,0.3668,0.3950,0.9286,0.3537,-0.4101,1.1437
Size,-0.1253,0.0748,-1.6743,0.0950,-0.2725,0.0219
NewItems,-0.0010,0.0012,-0.8914,0.3733,-0.0033,0.0012
Filelate,-0.2338,0.0909,-2.5722,0.0105,-0.4126,-0.0550
POST,0.3342,0.2732,1.2233,0.2220,-0.2031,0.8715
POScore,-0.4215,0.3381,-1.2467,0.2133,-1.0864,0.2434
Car_e1,0.0244,0.0695,0.3505,0.7262,-0.1124,0.1611


In [None]:
#Analyst
#With CARea
#Treatment Group
#Score
#Car_e2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e2']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0485
Estimator:,PanelOLS,R-squared (Between):,-9.8472
No. Observations:,527,R-squared (Within):,-0.0479
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-4.2169
Time:,19:42:39,Log-likelihood,618.78
Cov. Estimator:,Clustered,,
,,F-statistic:,2.5333
Entities:,198,P-value,0.0149
Avg Obs:,2.6616,Distribution:,"F(7,348)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.9094,0.4515,2.0141,0.0448,0.0214,1.7975
Score,0.3748,0.3982,0.9414,0.3471,-0.4083,1.1579
Size,-0.1243,0.0739,-1.6829,0.0933,-0.2696,0.0210
NewItems,-0.0010,0.0011,-0.8923,0.3729,-0.0032,0.0012
Filelate,-0.2287,0.0864,-2.6467,0.0085,-0.3987,-0.0588
POST,0.3401,0.2846,1.1951,0.2329,-0.2196,0.8998
POScore,-0.4283,0.3516,-1.2181,0.2240,-1.1199,0.2633
Car_e2,-0.0016,0.0582,-0.0267,0.9787,-0.1161,0.1130


In [None]:
#Analyst
#With CARea
#Treatment Group
#Score
#Car_e3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e3']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0487
Estimator:,PanelOLS,R-squared (Between):,-10.013
No. Observations:,527,R-squared (Within):,-0.0464
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-4.2906
Time:,19:42:53,Log-likelihood,618.83
Cov. Estimator:,Clustered,,
,,F-statistic:,2.5435
Entities:,198,P-value,0.0145
Avg Obs:,2.6616,Distribution:,"F(7,348)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.9256,0.4600,2.0122,0.0450,0.0209,1.8303
Score,0.3674,0.3950,0.9300,0.3530,-0.4096,1.1443
Size,-0.1252,0.0748,-1.6747,0.0949,-0.2723,0.0218
NewItems,-0.0010,0.0012,-0.8910,0.3736,-0.0033,0.0012
Filelate,-0.2333,0.0906,-2.5749,0.0104,-0.4116,-0.0551
POST,0.3346,0.2738,1.2221,0.2225,-0.2039,0.8732
POScore,-0.4220,0.3388,-1.2455,0.2138,-1.0884,0.2444
Car_e3,0.0222,0.0692,0.3200,0.7492,-0.1140,0.1584


In [None]:
#Analyst
#With CARea
#Treatment Group
#Score
#Scar_e0

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e0']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0486
Estimator:,PanelOLS,R-squared (Between):,-9.7523
No. Observations:,527,R-squared (Within):,-0.0469
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-4.1734
Time:,19:50:35,Log-likelihood,618.82
Cov. Estimator:,Clustered,,
,,F-statistic:,2.5415
Entities:,198,P-value,0.0146
Avg Obs:,2.6616,Distribution:,"F(7,348)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.9029,0.4564,1.9781,0.0487,0.0052,1.8006
Score,0.3763,0.3954,0.9516,0.3420,-0.4014,1.1540
Size,-0.1237,0.0737,-1.6791,0.0940,-0.2686,0.0212
NewItems,-0.0010,0.0011,-0.8898,0.3742,-0.0032,0.0012
Filelate,-0.2279,0.0880,-2.5908,0.0100,-0.4009,-0.0549
POST,0.3424,0.2895,1.1829,0.2377,-0.2269,0.9118
POScore,-0.4307,0.3573,-1.2055,0.2288,-1.1335,0.2720
Scar_e0,-0.0006,0.0014,-0.4653,0.6420,-0.0034,0.0021


In [None]:
#Analyst
#With CARea
#Treatment Group
#Score
#Scar_e1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e1']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0485
Estimator:,PanelOLS,R-squared (Between):,-9.7744
No. Observations:,527,R-squared (Within):,-0.0478
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-4.1845
Time:,19:50:57,Log-likelihood,618.79
Cov. Estimator:,Clustered,,
,,F-statistic:,2.5348
Entities:,198,P-value,0.0148
Avg Obs:,2.6616,Distribution:,"F(7,348)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.9042,0.4595,1.9679,0.0499,0.0005,1.8079
Score,0.3771,0.3949,0.9550,0.3403,-0.3996,1.1538
Size,-0.1239,0.0743,-1.6666,0.0965,-0.2701,0.0223
NewItems,-0.0010,0.0011,-0.8889,0.3747,-0.0032,0.0012
Filelate,-0.2282,0.0886,-2.5758,0.0104,-0.4024,-0.0540
POST,0.3410,0.2855,1.1943,0.2332,-0.2205,0.9024
POScore,-0.4293,0.3526,-1.2174,0.2243,-1.1229,0.2643
Scar_e1,-0.0003,0.0014,-0.1970,0.8440,-0.0031,0.0025


In [None]:
#Analyst
#With CARea
#Treatment Group
#Score
#Scar_e2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e2']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0486
Estimator:,PanelOLS,R-squared (Between):,-9.7488
No. Observations:,527,R-squared (Within):,-0.0468
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-4.1717
Time:,19:51:10,Log-likelihood,618.83
Cov. Estimator:,Clustered,,
,,F-statistic:,2.5421
Entities:,198,P-value,0.0146
Avg Obs:,2.6616,Distribution:,"F(7,348)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.9027,0.4566,1.9769,0.0488,0.0046,1.8007
Score,0.3763,0.3952,0.9521,0.3417,-0.4010,1.1537
Size,-0.1237,0.0737,-1.6787,0.0941,-0.2685,0.0212
NewItems,-0.0010,0.0011,-0.8904,0.3739,-0.0032,0.0012
Filelate,-0.2279,0.0880,-2.5906,0.0100,-0.4009,-0.0549
POST,0.3426,0.2896,1.1831,0.2376,-0.2269,0.9121
POScore,-0.4309,0.3574,-1.2057,0.2288,-1.1337,0.2720
Scar_e2,-0.0007,0.0014,-0.4875,0.6262,-0.0033,0.0020


In [None]:
#Analyst
#With CARea
#Treatment Group
#Score
#Scar_e3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e3']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0485
Estimator:,PanelOLS,R-squared (Between):,-9.7542
No. Observations:,527,R-squared (Within):,-0.0478
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-4.1755
Time:,19:51:22,Log-likelihood,618.80
Cov. Estimator:,Clustered,,
,,F-statistic:,2.5357
Entities:,198,P-value,0.0148
Avg Obs:,2.6616,Distribution:,"F(7,348)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.9027,0.4587,1.9678,0.0499,0.0004,1.8049
Score,0.3778,0.3950,0.9565,0.3395,-0.3991,1.1548
Size,-0.1238,0.0742,-1.6671,0.0964,-0.2698,0.0222
NewItems,-0.0010,0.0011,-0.8902,0.3740,-0.0032,0.0012
Filelate,-0.2280,0.0885,-2.5776,0.0104,-0.4020,-0.0540
POST,0.3414,0.2858,1.1944,0.2331,-0.2208,0.9036
POScore,-0.4298,0.3530,-1.2176,0.2242,-1.1241,0.2645
Scar_e3,-0.0004,0.0014,-0.2547,0.7991,-0.0031,0.0024


In [None]:
#Adoption avoidance group
#Score
import pandas as pd
from linearmodels.panel import PanelOLS
from statsmodels.tools.tools import add_constant

import numpy as np

df = pd.read_excel('/content/Adoption Avoidance Group August 7, 2021.xlsx') 

#Alternative for fixed effect time-series
dfd = df[['Revision','cusip','fyear','Car0','Car1','Car2','Car3','Scar0','Scar1','Scar2','Scar3','Car_e0','Car_e1','Car_e2','Car_e3','Scar_e0', 'Scar_e1', 'Scar_e2','Scar_e3','Other information MD&A Recoded','Score','Raw Score','Size','Filelate','NewItems (excluding 0)','POST']]

dfd.rename(columns={'Other information MD&A Recoded':'Auditor_OI','NewItems (excluding 0)':'NewItems'},inplace=True)

dfd['year'] = dfd['fyear'].astype(int)
#dfd['year'] = pd.Categorical(dfd.year)
dfd['year'] = pd.to_datetime(dfd.year, format='%Y')
dfd['cusip'] = pd.Categorical(dfd['cusip'])
dfd = dfd.set_index(['cusip','year'])
dfd['POScore'] = dfd['POST'] * dfd['Score']

#Car0

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Car0, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/use

0,1,2,3
Dep. Variable:,Car0,R-squared:,0.0881
Estimator:,PanelOLS,R-squared (Between):,-1.0327
No. Observations:,328,R-squared (Within):,-0.7333
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.9189
Time:,23:28:01,Log-likelihood,457.44
Cov. Estimator:,Clustered,,
,,F-statistic:,3.7527
Entities:,86,P-value,0.0014
Avg Obs:,3.8140,Distribution:,"F(6,233)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.6464,0.4844,-1.3344,0.1834,-1.6009,0.3080
Score,0.9329,0.6141,1.5191,0.1301,-0.2770,2.1428
Size,-0.0329,0.0127,-2.5983,0.0100,-0.0578,-0.0079
NewItems,0.0018,0.0007,2.7860,0.0058,0.0005,0.0032
Filelate,0.0405,0.0343,1.1800,0.2392,-0.0271,0.1081
POST,-0.1728,0.3542,-0.4878,0.6262,-0.8707,0.5251
POScore,0.0976,0.4227,0.2309,0.8176,-0.7352,0.9304


In [None]:
#Investors
#Without CARea
#Adoption Avoidance Group
#Score
#Car1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Car1, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Car1,R-squared:,0.0903
Estimator:,PanelOLS,R-squared (Between):,-0.5119
No. Observations:,328,R-squared (Within):,-0.8557
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.7419
Time:,19:54:28,Log-likelihood,488.66
Cov. Estimator:,Clustered,,
,,F-statistic:,3.8564
Entities:,86,P-value,0.0011
Avg Obs:,3.8140,Distribution:,"F(6,233)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.6153,0.4604,-1.3364,0.1827,-1.5224,0.2918
Score,0.9281,0.5978,1.5526,0.1219,-0.2497,2.1059
Size,-0.0294,0.0115,-2.5584,0.0111,-0.0520,-0.0068
NewItems,0.0014,0.0006,2.3936,0.0175,0.0003,0.0026
Filelate,0.0322,0.0260,1.2412,0.2158,-0.0189,0.0834
POST,-0.1955,0.3036,-0.6439,0.5203,-0.7937,0.4027
POScore,0.1262,0.3598,0.3507,0.7262,-0.5827,0.8350


In [None]:
#Investors
#Without CARea
#Adoption Avoidance Group
#Score
#Car2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Car2, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Car2,R-squared:,0.0863
Estimator:,PanelOLS,R-squared (Between):,-0.9530
No. Observations:,328,R-squared (Within):,-0.7096
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.8706
Time:,19:54:48,Log-likelihood,456.39
Cov. Estimator:,Clustered,,
,,F-statistic:,3.6680
Entities:,86,P-value,0.0017
Avg Obs:,3.8140,Distribution:,"F(6,233)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.6339,0.4795,-1.3220,0.1875,-1.5786,0.3108
Score,0.9244,0.6117,1.5112,0.1321,-0.2808,2.1296
Size,-0.0327,0.0130,-2.5149,0.0126,-0.0584,-0.0071
NewItems,0.0018,0.0007,2.6496,0.0086,0.0005,0.0031
Filelate,0.0409,0.0335,1.2207,0.2234,-0.0251,0.1070
POST,-0.1774,0.3645,-0.4869,0.6268,-0.8955,0.5406
POScore,0.1051,0.4350,0.2417,0.8092,-0.7519,0.9621


In [None]:
#Investors
#Without CARea
#Adoption Avoidance Group
#Score
#Car3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Car3, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Car3,R-squared:,0.0890
Estimator:,PanelOLS,R-squared (Between):,-0.4482
No. Observations:,328,R-squared (Within):,-0.8383
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.7032
Time:,19:55:01,Log-likelihood,487.82
Cov. Estimator:,Clustered,,
,,F-statistic:,3.7960
Entities:,86,P-value,0.0013
Avg Obs:,3.8140,Distribution:,"F(6,233)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.6111,0.4623,-1.3220,0.1875,-1.5219,0.2996
Score,0.9230,0.5996,1.5393,0.1251,-0.2584,2.1043
Size,-0.0291,0.0116,-2.5160,0.0125,-0.0519,-0.0063
NewItems,0.0014,0.0006,2.2959,0.0226,0.0002,0.0027
Filelate,0.0338,0.0244,1.3847,0.1675,-0.0143,0.0818
POST,-0.1771,0.3202,-0.5531,0.5807,-0.8080,0.4538
POScore,0.1052,0.3803,0.2765,0.7824,-0.6441,0.8545


In [None]:
#Investors
#Without CARea
#Adoption Avoidance Group
#Score
#Scar0

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Scar0, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Scar0,R-squared:,0.0470
Estimator:,PanelOLS,R-squared (Between):,-0.7001
No. Observations:,328,R-squared (Within):,-0.4448
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.5337
Time:,23:28:40,Log-likelihood,-558.83
Cov. Estimator:,Clustered,,
,,F-statistic:,1.9155
Entities:,86,P-value,0.0791
Avg Obs:,3.8140,Distribution:,"F(6,233)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-13.223,10.516,-1.2574,0.2099,-33.942,7.4962
Score,20.883,13.401,1.5583,0.1205,-5.5196,47.285
Size,-0.4891,0.1852,-2.6413,0.0088,-0.8540,-0.1243
NewItems,0.0177,0.0132,1.3438,0.1803,-0.0083,0.0438
Filelate,-0.4546,0.7601,-0.5980,0.5504,-1.9522,1.0430
POST,0.2124,5.3522,0.0397,0.9684,-10.333,10.757
POScore,-2.3767,6.3345,-0.3752,0.7078,-14.857,10.104


In [None]:
#Investors
#Without CARea
#Adoption Avoidance Group
#Score
#Scar1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Scar1, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Scar1,R-squared:,0.0466
Estimator:,PanelOLS,R-squared (Between):,-0.4193
No. Observations:,325,R-squared (Within):,-0.4329
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.4067
Time:,23:29:16,Log-likelihood,-543.49
Cov. Estimator:,Clustered,,
,,F-statistic:,1.8748
Entities:,86,P-value,0.0860
Avg Obs:,3.7791,Distribution:,"F(6,230)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-13.524,9.9081,-1.3649,0.1736,-33.046,5.9984
Score,21.471,12.733,1.6862,0.0931,-3.6174,46.560
Size,-0.3922,0.1634,-2.4003,0.0172,-0.7141,-0.0703
NewItems,0.0108,0.0129,0.8350,0.4046,-0.0146,0.0362
Filelate,-0.5233,0.8209,-0.6374,0.5245,-2.1407,1.0942
POST,-0.7068,4.9963,-0.1415,0.8876,-10.551,9.1376
POScore,-1.1775,5.8146,-0.2025,0.8397,-12.634,10.279


In [None]:
#Investors
#Without CARea
#Adoption Avoidance Group
#Score
#Scar2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Scar2, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Scar2,R-squared:,0.0432
Estimator:,PanelOLS,R-squared (Between):,-0.6007
No. Observations:,328,R-squared (Within):,-0.4322
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.4917
Time:,23:29:34,Log-likelihood,-561.76
Cov. Estimator:,Clustered,,
,,F-statistic:,1.7543
Entities:,86,P-value,0.1094
Avg Obs:,3.8140,Distribution:,"F(6,233)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-12.040,9.8182,-1.2263,0.2213,-31.384,7.3036
Score,20.127,12.941,1.5553,0.1212,-5.3694,45.624
Size,-0.4851,0.1982,-2.4470,0.0151,-0.8757,-0.0945
NewItems,0.0135,0.0122,1.1032,0.2711,-0.0106,0.0376
Filelate,-0.5238,0.7772,-0.6739,0.5010,-2.0549,1.0074
POST,-0.6237,5.3500,-0.1166,0.9073,-11.164,9.9169
POScore,-1.3275,6.3308,-0.2097,0.8341,-13.800,11.145


In [None]:
#Investors
#Without CARea
#Adoption Avoidance Group
#Score
#Scar3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Scar3, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Scar3,R-squared:,0.0446
Estimator:,PanelOLS,R-squared (Between):,-0.3561
No. Observations:,328,R-squared (Within):,-0.4237
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.4021
Time:,23:29:48,Log-likelihood,-551.79
Cov. Estimator:,Clustered,,
,,F-statistic:,1.8139
Entities:,86,P-value,0.0972
Avg Obs:,3.8140,Distribution:,"F(6,233)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-13.054,9.6837,-1.3480,0.1790,-32.133,6.0247
Score,20.932,12.651,1.6546,0.0993,-3.9920,45.857
Size,-0.4179,0.1765,-2.3682,0.0187,-0.7655,-0.0702
NewItems,0.0120,0.0123,0.9815,0.3274,-0.0121,0.0362
Filelate,-0.5552,0.8358,-0.6643,0.5071,-2.2019,1.0914
POST,0.4677,4.9745,0.0940,0.9252,-9.3329,10.268
POScore,-2.5859,5.8290,-0.4436,0.6577,-14.070,8.8983


In [None]:
#Investors
#With CARea
#Adoption Avoidance Group
#Score
#Car0 + Car_e0

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e0']])

FE = PanelOLS(dfd.Car0, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
  return Series(np.sqrt(np.diag(self.cov)), self._var_names, name="std_error")


0,1,2,3
Dep. Variable:,Car0,R-squared:,0.7956
Estimator:,PanelOLS,R-squared (Between):,0.5506
No. Observations:,328,R-squared (Within):,0.7266
Date:,"Sat, Aug 14 2021",R-squared (Overall):,0.6305
Time:,23:30:39,Log-likelihood,702.70
Cov. Estimator:,Clustered,,
,,F-statistic:,129.01
Entities:,86,P-value,0.0000
Avg Obs:,3.8140,Distribution:,"F(7,232)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.0135,0.0774,-0.1747,0.8615,-0.1661,0.1390
Score,0.1572,0.1154,1.3624,0.1744,-0.0701,0.3846
Size,-0.0181,0.0080,-2.2727,0.0240,-0.0337,-0.0024
NewItems,0.0004,0.0005,0.7365,0.4622,-0.0006,0.0014
Filelate,0.0055,,,,,
POST,-0.0475,0.0658,-0.7210,0.4716,-0.1771,0.0822
POScore,0.0210,0.0814,0.2583,0.7964,-0.1394,0.1814
Car_e0,0.8725,0.0400,21.822,0.0000,0.7937,0.9513


In [None]:
#Investors
#With CARea
#Adoption Avoidance Group
#Score
#Car1 + Car_e1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e1']])

FE = PanelOLS(dfd.Car1, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
  return Series(np.sqrt(np.diag(self.cov)), self._var_names, name="std_error")


0,1,2,3
Dep. Variable:,Car1,R-squared:,0.7544
Estimator:,PanelOLS,R-squared (Between):,0.7328
No. Observations:,328,R-squared (Within):,0.6865
Date:,"Sat, Aug 14 2021",R-squared (Overall):,0.6932
Time:,23:31:02,Log-likelihood,703.37
Cov. Estimator:,Clustered,,
,,F-statistic:,101.78
Entities:,86,P-value,0.0000
Avg Obs:,3.8140,Distribution:,"F(7,232)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.0734,0.0981,-0.7483,0.4551,-0.2668,0.1199
Score,0.2637,0.1549,1.7017,0.0902,-0.0416,0.5689
Size,-0.0133,0.0064,-2.0865,0.0380,-0.0258,-0.0007
NewItems,-7.627e-05,0.0006,-0.1268,0.8992,-0.0013,0.0011
Filelate,0.0074,,,,,
POST,-0.0374,0.0755,-0.4955,0.6207,-0.1862,0.1114
POScore,0.0095,0.0951,0.0997,0.9207,-0.1779,0.1968
Car_e1,0.8257,0.0621,13.286,0.0000,0.7032,0.9481


In [None]:
#Investors
#With CARea
#Adoption Avoidance Group
#Score
#Car2 + Car_e2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e2']])

FE = PanelOLS(dfd.Car2, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
  return Series(np.sqrt(np.diag(self.cov)), self._var_names, name="std_error")


0,1,2,3
Dep. Variable:,Car2,R-squared:,0.7978
Estimator:,PanelOLS,R-squared (Between):,0.5514
No. Observations:,328,R-squared (Within):,0.7303
Date:,"Sat, Aug 14 2021",R-squared (Overall):,0.6323
Time:,23:31:18,Log-likelihood,703.71
Cov. Estimator:,Clustered,,
,,F-statistic:,130.74
Entities:,86,P-value,0.0000
Avg Obs:,3.8140,Distribution:,"F(7,232)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.0065,0.0767,-0.0847,0.9325,-0.1577,0.1447
Score,0.1519,0.1147,1.3246,0.1866,-0.0741,0.3780
Size,-0.0182,0.0081,-2.2464,0.0256,-0.0342,-0.0022
NewItems,0.0004,0.0005,0.7066,0.4805,-0.0007,0.0014
Filelate,0.0050,,,,,
POST,-0.0671,0.0604,-1.1108,0.2678,-0.1861,0.0519
POScore,0.0453,0.0746,0.6078,0.5439,-0.1016,0.1922
Car_e2,0.8742,0.0392,22.282,0.0000,0.7969,0.9514


In [None]:
#Investors
#With CARea
#Adoption Avoidance Group
#Score
#Car3 + Car_e3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e3']])

FE = PanelOLS(dfd.Car3, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
  return Series(np.sqrt(np.diag(self.cov)), self._var_names, name="std_error")


0,1,2,3
Dep. Variable:,Car3,R-squared:,0.7451
Estimator:,PanelOLS,R-squared (Between):,0.7437
No. Observations:,328,R-squared (Within):,0.6743
Date:,"Sat, Aug 14 2021",R-squared (Overall):,0.6907
Time:,23:31:35,Log-likelihood,696.71
Cov. Estimator:,Clustered,,
,,F-statistic:,96.888
Entities:,86,P-value,0.0000
Avg Obs:,3.8140,Distribution:,"F(7,232)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.0755,0.1019,-0.7413,0.4592,-0.2763,0.1252
Score,0.2653,0.1617,1.6413,0.1021,-0.0532,0.5838
Size,-0.0131,0.0063,-2.0907,0.0376,-0.0255,-0.0008
NewItems,-7.437e-05,0.0006,-0.1211,0.9037,-0.0013,0.0011
Filelate,0.0083,,,,,
POST,-0.0297,0.0810,-0.3662,0.7145,-0.1892,0.1299
POScore,-0.0003,0.1027,-0.0031,0.9975,-0.2027,0.2020
Car_e3,0.8199,0.0642,12.779,0.0000,0.6935,0.9463


In [None]:
#Investors
#With CARea
#Adoption Avoidance Group
#Score
#Scar0 + Scar_e0

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e0']])

FE = PanelOLS(dfd.Scar0, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
  return Series(np.sqrt(np.diag(self.cov)), self._var_names, name="std_error")


0,1,2,3
Dep. Variable:,Scar0,R-squared:,0.7841
Estimator:,PanelOLS,R-squared (Between):,0.4686
No. Observations:,328,R-squared (Within):,0.6387
Date:,"Sat, Aug 14 2021",R-squared (Overall):,0.5738
Time:,23:32:09,Log-likelihood,-315.36
Cov. Estimator:,Clustered,,
,,F-statistic:,120.34
Entities:,86,P-value,0.0000
Avg Obs:,3.8140,Distribution:,"F(7,232)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-4.0758,3.5034,-1.1634,0.2459,-10.978,2.8268
Score,7.2714,4.0747,1.7845,0.0756,-0.7567,15.299
Size,-0.2884,0.0721,-4.0007,0.0001,-0.4304,-0.1464
NewItems,0.0081,0.0087,0.9361,0.3502,-0.0090,0.0253
Filelate,-0.0095,,,,,
POST,-0.7020,2.5871,-0.2714,0.7864,-5.7993,4.3952
POScore,-0.4081,3.4568,-0.1181,0.9061,-7.2188,6.4027
Scar_e0,0.8346,0.0572,14.583,0.0000,0.7218,0.9473


In [None]:
#Investors
#With CARea
#Adoption Avoidance Group
#Score
#Scar1 + Scar_e1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e1']])

FE = PanelOLS(dfd.Scar1, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Scar1,R-squared:,0.6959
Estimator:,PanelOLS,R-squared (Between):,0.5417
No. Observations:,325,R-squared (Within):,0.5805
Date:,"Sat, Aug 14 2021",R-squared (Overall):,0.5636
Time:,23:32:26,Log-likelihood,-357.83
Cov. Estimator:,Clustered,,
,,F-statistic:,74.851
Entities:,86,P-value,0.0000
Avg Obs:,3.7791,Distribution:,"F(7,229)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-6.5282,4.4725,-1.4596,0.1458,-15.341,2.2843
Score,11.589,5.8475,1.9820,0.0487,0.0677,23.111
Size,-0.2116,0.0890,-2.3788,0.0182,-0.3869,-0.0363
NewItems,-0.0039,0.0124,-0.3104,0.7565,-0.0284,0.0207
Filelate,-0.0776,0.0865,-0.8961,0.3712,-0.2481,0.0930
POST,0.1584,2.7437,0.0577,0.9540,-5.2478,5.5646
POScore,-1.3436,3.6819,-0.3649,0.7155,-8.5984,5.9111
Scar_e1,0.7855,0.0690,11.388,0.0000,0.6496,0.9214


In [None]:
#Investors
#With CARea
#Adoption Avoidance Group
#Score
#Scar2 + Scar_e2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e2']])

FE = PanelOLS(dfd.Scar2, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
  return Series(np.sqrt(np.diag(self.cov)), self._var_names, name="std_error")


0,1,2,3
Dep. Variable:,Scar2,R-squared:,0.7863
Estimator:,PanelOLS,R-squared (Between):,0.4838
No. Observations:,328,R-squared (Within):,0.6396
Date:,"Sat, Aug 14 2021",R-squared (Overall):,0.5795
Time:,23:32:43,Log-likelihood,-315.93
Cov. Estimator:,Clustered,,
,,F-statistic:,121.94
Entities:,86,P-value,0.0000
Avg Obs:,3.8140,Distribution:,"F(7,232)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-3.3503,3.2806,-1.0213,0.3082,-9.8138,3.1132
Score,6.7679,3.9016,1.7346,0.0841,-0.9192,14.455
Size,-0.2934,0.0684,-4.2924,0.0000,-0.4281,-0.1588
NewItems,0.0061,0.0084,0.7279,0.4674,-0.0105,0.0227
Filelate,-0.0542,,,,,
POST,-2.1651,1.9229,-1.1260,0.2613,-5.9536,1.6234
POScore,1.3745,2.6536,0.5180,0.6050,-3.8538,6.6027
Scar_e2,0.8370,0.0577,14.503,0.0000,0.7233,0.9507


In [None]:
#Investors
#With CARea
#Adoption Avoidance Group
#Score
#Scar3 + Scar_e3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e3']])

FE = PanelOLS(dfd.Scar3, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Scar3,R-squared:,0.6912
Estimator:,PanelOLS,R-squared (Between):,0.5334
No. Observations:,328,R-squared (Within):,0.5777
Date:,"Sat, Aug 14 2021",R-squared (Overall):,0.5560
Time:,23:32:59,Log-likelihood,-366.57
Cov. Estimator:,Clustered,,
,,F-statistic:,74.180
Entities:,86,P-value,0.0000
Avg Obs:,3.8140,Distribution:,"F(7,232)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-6.4551,4.3579,-1.4813,0.1399,-15.041,2.1309
Score,11.364,5.8330,1.9482,0.0526,-0.1284,22.856
Size,-0.2293,0.0863,-2.6568,0.0084,-0.3993,-0.0593
NewItems,-0.0021,0.0126,-0.1670,0.8676,-0.0270,0.0228
Filelate,-0.0871,0.0916,-0.9507,0.3427,-0.2676,0.0934
POST,0.7165,2.8704,0.2496,0.8031,-4.9389,6.3719
POScore,-2.0158,3.8128,-0.5287,0.5975,-9.5279,5.4963
Scar_e3,0.7849,0.0689,11.391,0.0000,0.6491,0.9206


In [None]:
#Analyst
#Without CARea
#Adoption Avoidance Group
#Score

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0137
Estimator:,PanelOLS,R-squared (Between):,-0.3126
No. Observations:,238,R-squared (Within):,0.0154
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.1223
Time:,23:33:41,Log-likelihood,336.22
Cov. Estimator:,Clustered,,
,,F-statistic:,0.3471
Entities:,86,P-value,0.9107
Avg Obs:,2.7674,Distribution:,"F(6,150)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.1567,0.2022,-0.7752,0.4395,-0.5561,0.2427
Score,0.2868,0.2190,1.3097,0.1923,-0.1459,0.7194
Size,-0.0171,0.0163,-1.0459,0.2973,-0.0493,0.0152
NewItems,0.0007,0.0009,0.7807,0.4362,-0.0011,0.0025
Filelate,0.0132,0.0217,0.6081,0.5440,-0.0296,0.0560
POST,0.3314,0.4709,0.7038,0.4827,-0.5990,1.2618
POScore,-0.3943,0.5619,-0.7017,0.4840,-1.5046,0.7160


In [None]:
#Analyst
#With CARea
#Adoption Avoidance Group
#Score
#Car_e0

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e0']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0248
Estimator:,PanelOLS,R-squared (Between):,-0.2252
No. Observations:,238,R-squared (Within):,0.0251
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.0722
Time:,23:34:14,Log-likelihood,337.57
Cov. Estimator:,Clustered,,
,,F-statistic:,0.5415
Entities:,86,P-value,0.8019
Avg Obs:,2.7674,Distribution:,"F(7,149)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.0044,0.2250,-0.0194,0.9845,-0.4489,0.4401
Score,0.0964,0.2195,0.4394,0.6610,-0.3372,0.5301
Size,-0.0146,0.0147,-0.9914,0.3231,-0.0437,0.0145
NewItems,0.0006,0.0009,0.6529,0.5148,-0.0012,0.0023
Filelate,0.0200,0.0137,1.4593,0.1466,-0.0071,0.0470
POST,0.3206,0.4324,0.7414,0.4596,-0.5339,1.1751
POScore,-0.3891,0.5172,-0.7523,0.4531,-1.4110,0.6329
Car_e0,0.1017,0.0655,1.5524,0.1227,-0.0277,0.2311


In [None]:
#Analyst
#With CARea
#Adoption Avoidance Group
#Score
#Car_e1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e1']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0222
Estimator:,PanelOLS,R-squared (Between):,-0.2584
No. Observations:,238,R-squared (Within):,0.0236
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.0860
Time:,23:34:30,Log-likelihood,337.25
Cov. Estimator:,Clustered,,
,,F-statistic:,0.4830
Entities:,86,P-value,0.8458
Avg Obs:,2.7674,Distribution:,"F(7,149)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.0249,0.2427,-0.1028,0.9183,-0.5046,0.4547
Score,0.1360,0.2420,0.5620,0.5749,-0.3422,0.6143
Size,-0.0156,0.0161,-0.9671,0.3351,-0.0474,0.0163
NewItems,0.0005,0.0009,0.5790,0.5635,-0.0013,0.0024
Filelate,0.0191,0.0116,1.6507,0.1009,-0.0038,0.0420
POST,0.3393,0.4467,0.7594,0.4488,-0.5435,1.2221
POScore,-0.4101,0.5350,-0.7664,0.4446,-1.4673,0.6472
Car_e1,0.1016,0.0745,1.3643,0.1745,-0.0456,0.2487


In [None]:
#Analyst
#With CARea
#Adoption Avoidance Group
#Score
#Car_e2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e2']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0258
Estimator:,PanelOLS,R-squared (Between):,-0.2266
No. Observations:,238,R-squared (Within):,0.0255
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.0725
Time:,23:34:46,Log-likelihood,337.69
Cov. Estimator:,Clustered,,
,,F-statistic:,0.5632
Entities:,86,P-value,0.7848
Avg Obs:,2.7674,Distribution:,"F(7,149)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0001,0.2225,0.0005,0.9996,-0.4396,0.4398
Score,0.0899,0.2168,0.4148,0.6789,-0.3384,0.5183
Size,-0.0146,0.0147,-0.9873,0.3251,-0.0437,0.0146
NewItems,0.0006,0.0009,0.6563,0.5126,-0.0012,0.0023
Filelate,0.0199,0.0134,1.4861,0.1394,-0.0066,0.0464
POST,0.3192,0.4291,0.7440,0.4581,-0.5286,1.1671
POScore,-0.3877,0.5131,-0.7557,0.4510,-1.4016,0.6261
Car_e2,0.1053,0.0655,1.6090,0.1097,-0.0240,0.2347


In [None]:
#Analyst
#With CARea
#Adoption Avoidance Group
#Score
#Car_e3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e3']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0224
Estimator:,PanelOLS,R-squared (Between):,-0.2598
No. Observations:,238,R-squared (Within):,0.0236
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.0869
Time:,23:34:58,Log-likelihood,337.28
Cov. Estimator:,Clustered,,
,,F-statistic:,0.4881
Entities:,86,P-value,0.8421
Avg Obs:,2.7674,Distribution:,"F(7,149)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.0241,0.2415,-0.0998,0.9206,-0.5013,0.4531
Score,0.1343,0.2402,0.5594,0.5767,-0.3402,0.6089
Size,-0.0155,0.0161,-0.9647,0.3362,-0.0474,0.0163
NewItems,0.0005,0.0009,0.5814,0.5618,-0.0013,0.0024
Filelate,0.0188,0.0115,1.6420,0.1027,-0.0038,0.0415
POST,0.3385,0.4454,0.7600,0.4484,-0.5416,1.2186
POScore,-0.4092,0.5334,-0.7671,0.4442,-1.4633,0.6448
Car_e3,0.1023,0.0743,1.3766,0.1707,-0.0446,0.2492


In [None]:
#Analyst
#With CARea
#Adoption Avoidance Group
#Score
#Scar_e0

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e0']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0139
Estimator:,PanelOLS,R-squared (Between):,-0.3031
No. Observations:,238,R-squared (Within):,0.0140
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.1190
Time:,23:35:12,Log-likelihood,336.24
Cov. Estimator:,Clustered,,
,,F-statistic:,0.2990
Entities:,86,P-value,0.9533
Avg Obs:,2.7674,Distribution:,"F(7,149)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.1371,0.2321,-0.5909,0.5555,-0.5957,0.3215
Score,0.2613,0.2460,1.0620,0.2899,-0.2249,0.7475
Size,-0.0168,0.0161,-1.0421,0.2991,-0.0486,0.0150
NewItems,0.0007,0.0009,0.7694,0.4429,-0.0011,0.0025
Filelate,0.0137,0.0201,0.6820,0.4963,-0.0260,0.0534
POST,0.3264,0.4668,0.6993,0.4855,-0.5960,1.2488
POScore,-0.3903,0.5582,-0.6991,0.4856,-1.4934,0.7128
Scar_e0,0.0005,0.0016,0.3470,0.7291,-0.0025,0.0036


In [None]:
#Analyst
#With CARea
#Adoption Avoidance Group
#Score
#Scar_e1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e1']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0139
Estimator:,PanelOLS,R-squared (Between):,-0.3320
No. Observations:,235,R-squared (Within):,0.0149
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.1274
Time:,23:35:29,Log-likelihood,330.80
Cov. Estimator:,Clustered,,
,,F-statistic:,0.2938
Entities:,86,P-value,0.9554
Avg Obs:,2.7326,Distribution:,"F(7,146)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.1665,0.2492,-0.6681,0.5051,-0.6589,0.3260
Score,0.2938,0.2690,1.0921,0.2766,-0.2379,0.8255
Size,-0.0172,0.0166,-1.0376,0.3012,-0.0500,0.0156
NewItems,0.0008,0.0009,0.8019,0.4239,-0.0011,0.0026
Filelate,0.0134,0.0213,0.6301,0.5296,-0.0286,0.0554
POST,0.3297,0.4678,0.7049,0.4820,-0.5947,1.2541
POScore,-0.3922,0.5587,-0.7019,0.4838,-1.4964,0.7120
Scar_e1,-0.0003,0.0023,-0.1345,0.8932,-0.0049,0.0042


In [None]:
#Analyst
#With CARea
#Adoption Avoidance Group
#Score
#Scar_e2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e2']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0141
Estimator:,PanelOLS,R-squared (Between):,-0.2994
No. Observations:,238,R-squared (Within):,0.0131
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.1179
Time:,23:35:43,Log-likelihood,336.27
Cov. Estimator:,Clustered,,
,,F-statistic:,0.3039
Entities:,86,P-value,0.9512
Avg Obs:,2.7674,Distribution:,"F(7,149)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.1270,0.2268,-0.5599,0.5764,-0.5751,0.3212
Score,0.2476,0.2408,1.0284,0.3054,-0.2282,0.7233
Size,-0.0166,0.0161,-1.0307,0.3043,-0.0484,0.0152
NewItems,0.0007,0.0009,0.7686,0.4434,-0.0011,0.0025
Filelate,0.0140,0.0200,0.6984,0.4860,-0.0255,0.0535
POST,0.3235,0.4650,0.6957,0.4877,-0.5953,1.2422
POScore,-0.3879,0.5560,-0.6976,0.4865,-1.4865,0.7108
Scar_e2,0.0008,0.0015,0.5674,0.5713,-0.0021,0.0037


In [None]:
#Analyst
#With CARea
#Adoption Avoidance Group
#Score
#Scar_e3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e3']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.


0,1,2,3
Dep. Variable:,Revision,R-squared:,0.0137
Estimator:,PanelOLS,R-squared (Between):,-0.3154
No. Observations:,238,R-squared (Within):,0.0159
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.1233
Time:,23:35:56,Log-likelihood,336.23
Cov. Estimator:,Clustered,,
,,F-statistic:,0.2960
Entities:,86,P-value,0.9546
Avg Obs:,2.7674,Distribution:,"F(7,149)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.1631,0.2425,-0.6727,0.5022,-0.6423,0.3161
Score,0.2948,0.2641,1.1159,0.2663,-0.2272,0.8167
Size,-0.0172,0.0165,-1.0384,0.3008,-0.0498,0.0155
NewItems,0.0007,0.0009,0.7696,0.4428,-0.0011,0.0026
Filelate,0.0130,0.0208,0.6241,0.5335,-0.0282,0.0542
POST,0.3324,0.4717,0.7047,0.4821,-0.5997,1.2645
POScore,-0.3948,0.5637,-0.7004,0.4848,-1.5087,0.7191
Scar_e3,-0.0002,0.0022,-0.0937,0.9255,-0.0046,0.0041


In [None]:

import pandas as pd
from linearmodels.panel import PanelOLS
from statsmodels.tools.tools import add_constant

import numpy as np

df = pd.read_excel('/content/Non-Adoption Group Data August 7, 2021.xlsx') 

#Alternative for fixed effect time-series
dfd = df[['Revision','cusip','fyear','Car0','Car1','Car2','Car3','Scar0','Scar1','Scar2','Scar3','Car_e0','Car_e1','Car_e2','Car_e3','Scar_e0', 'Scar_e1', 'Scar_e2','Scar_e3','Other information MD&A Recoded','Score','Raw Score','Size','Filelate','NewItems (excluding 0)','POST']]

dfd.rename(columns={'Other information MD&A Recoded':'Auditor_OI','NewItems (excluding 0)':'NewItems'},inplace=True)

dfd['year'] = dfd['fyear'].astype(int)
#dfd['year'] = pd.Categorical(dfd.year)
dfd['year'] = pd.to_datetime(dfd.year, format='%Y')
dfd['cusip'] = pd.Categorical(dfd['cusip'])
dfd = dfd.set_index(['cusip','year'])
dfd['POScore'] = dfd['POST'] * dfd['Score']

#Car0
#Score

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Car0, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/use

0,1,2,3
Dep. Variable:,Car0,R-squared:,0.3085
Estimator:,PanelOLS,R-squared (Between):,-3.4572
No. Observations:,77,R-squared (Within):,0.3021
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-3.0156
Time:,23:43:14,Log-likelihood,158.11
Cov. Estimator:,Clustered,,
,,F-statistic:,4.1929
Entities:,22,P-value,0.0031
Avg Obs:,3.5000,Distribution:,"F(5,47)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.9062,0.4390,-2.0644,0.0445,-1.7894,-0.0231
Score,0.5363,0.6072,0.8832,0.3816,-0.6852,1.7577
Size,-0.0209,0.0229,-0.9128,0.3660,-0.0670,0.0252
NewItems,0.0042,0.0018,2.3520,0.0229,0.0006,0.0078
POST,-0.0485,0.4743,-0.1022,0.9190,-1.0026,0.9056
POScore,0.0882,0.6240,0.1414,0.8882,-1.1671,1.3435


In [None]:
#Investors
#Without CARea
#Non-Adoption Group
#Score
#Car1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Car1, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Car1,R-squared:,0.2637
Estimator:,PanelOLS,R-squared (Between):,-4.3176
No. Observations:,77,R-squared (Within):,0.2517
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-3.3113
Time:,23:44:33,Log-likelihood,165.45
Cov. Estimator:,Clustered,,
,,F-statistic:,3.3671
Entities:,22,P-value,0.0111
Avg Obs:,3.5000,Distribution:,"F(5,47)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.7625,0.3974,-1.9190,0.0611,-1.5619,0.0369
Score,0.3811,0.4762,0.8002,0.4276,-0.5770,1.3392
Size,-0.0136,0.0253,-0.5378,0.5933,-0.0645,0.0373
NewItems,0.0037,0.0018,2.0165,0.0495,8.736e-06,0.0074
POST,0.0599,0.4168,0.1436,0.8864,-0.7787,0.8984
POScore,-0.0394,0.5475,-0.0719,0.9430,-1.1408,1.0620


In [None]:
#Investors
#Without CARea
#Non-Adoption Group
#Score
#Car2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Car2, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Car2,R-squared:,0.3368
Estimator:,PanelOLS,R-squared (Between):,-3.6869
No. Observations:,77,R-squared (Within):,0.3242
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-3.1843
Time:,23:44:57,Log-likelihood,162.23
Cov. Estimator:,Clustered,,
,,F-statistic:,4.7731
Entities:,22,P-value,0.0013
Avg Obs:,3.5000,Distribution:,"F(5,47)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.8762,0.4181,-2.0956,0.0415,-1.7174,-0.0351
Score,0.5342,0.5726,0.9328,0.3557,-0.6178,1.6861
Size,-0.0247,0.0191,-1.2905,0.2032,-0.0632,0.0138
NewItems,0.0043,0.0016,2.6621,0.0106,0.0010,0.0075
POST,-0.0696,0.4458,-0.1561,0.8766,-0.9665,0.8273
POScore,0.1143,0.5864,0.1949,0.8463,-1.0653,1.2939


In [None]:
#Investors
#Without CARea
#Non-Adoption Group
#Score
#Car3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Car3, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Car3,R-squared:,0.2926
Estimator:,PanelOLS,R-squared (Between):,-5.4943
No. Observations:,77,R-squared (Within):,0.2774
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-3.8753
Time:,23:45:09,Log-likelihood,168.58
Cov. Estimator:,Clustered,,
,,F-statistic:,3.8877
Entities:,22,P-value,0.0050
Avg Obs:,3.5000,Distribution:,"F(5,47)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.7032,0.3942,-1.7839,0.0809,-1.4962,0.0898
Score,0.3389,0.4728,0.7169,0.4770,-0.6122,1.2900
Size,-0.0202,0.0221,-0.9151,0.3648,-0.0647,0.0242
NewItems,0.0039,0.0016,2.3762,0.0216,0.0006,0.0072
POST,0.0066,0.4044,0.0163,0.9871,-0.8069,0.8201
POScore,0.0295,0.5297,0.0556,0.9559,-1.0362,1.0951


In [None]:
#Investors
#Without CARea
#Non-Adoption Group
#Score
#Scar0

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Scar0, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Scar0,R-squared:,0.1362
Estimator:,PanelOLS,R-squared (Between):,-2.1054
No. Observations:,77,R-squared (Within):,0.0155
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.7262
Time:,23:45:26,Log-likelihood,-107.09
Cov. Estimator:,Clustered,,
,,F-statistic:,1.4817
Entities:,22,P-value,0.2138
Avg Obs:,3.5000,Distribution:,"F(5,47)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-10.162,9.1998,-1.1046,0.2750,-28.669,8.3456
Score,18.248,16.870,1.0817,0.2849,-15.690,52.187
Size,-1.1461,0.2767,-4.1415,0.0001,-1.7029,-0.5894
NewItems,0.0388,0.0277,1.4000,0.1681,-0.0169,0.0945
POST,5.3018,12.539,0.4228,0.6744,-19.924,30.528
POScore,-4.8307,16.640,-0.2903,0.7729,-38.306,28.645


In [None]:
#Investors
#Without CARea
#Non-Adoption Group
#Score
#Scar1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Scar1, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Scar1,R-squared:,0.1136
Estimator:,PanelOLS,R-squared (Between):,-3.2732
No. Observations:,77,R-squared (Within):,0.0428
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.9398
Time:,23:45:42,Log-likelihood,-106.08
Cov. Estimator:,Clustered,,
,,F-statistic:,1.2049
Entities:,22,P-value,0.3216
Avg Obs:,3.5000,Distribution:,"F(5,47)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-8.1463,8.5224,-0.9559,0.3440,-25.291,8.9985
Score,16.930,13.860,1.2215,0.2280,-10.953,44.814
Size,-1.1391,0.7213,-1.5792,0.1210,-2.5902,0.3120
NewItems,0.0330,0.0300,1.0998,0.2770,-0.0274,0.0933
POST,10.959,11.608,0.9440,0.3500,-12.394,34.312
POScore,-11.714,15.036,-0.7790,0.4399,-41.963,18.535


In [None]:
#Investors
#Without CARea
#Non-Adoption Group
#Score
#Scar2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Scar2, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Scar2,R-squared:,0.1504
Estimator:,PanelOLS,R-squared (Between):,-2.0522
No. Observations:,77,R-squared (Within):,-0.0183
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-0.8380
Time:,23:46:08,Log-likelihood,-103.99
Cov. Estimator:,Clustered,,
,,F-statistic:,1.6634
Entities:,22,P-value,0.1621
Avg Obs:,3.5000,Distribution:,"F(5,47)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-9.5619,8.9726,-1.0657,0.2920,-27.612,8.4887
Score,16.720,15.072,1.1093,0.2729,-13.601,47.042
Size,-1.1595,0.1710,-6.7819,0.0000,-1.5034,-0.8155
NewItems,0.0432,0.0226,1.9135,0.0618,-0.0022,0.0886
POST,3.6359,11.069,0.3285,0.7440,-18.632,25.904
POScore,-2.7454,14.744,-0.1862,0.8531,-32.406,26.915


In [None]:
#Investors
#Without CARea
#Non-Adoption Group
#Score
#Scar3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Scar3, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Scar3,R-squared:,0.1198
Estimator:,PanelOLS,R-squared (Between):,-4.2559
No. Observations:,77,R-squared (Within):,0.0170
Date:,"Sat, Aug 14 2021",R-squared (Overall):,-1.3461
Time:,23:46:23,Log-likelihood,-105.00
Cov. Estimator:,Clustered,,
,,F-statistic:,1.2795
Entities:,22,P-value,0.2886
Avg Obs:,3.5000,Distribution:,"F(5,47)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-5.9095,9.3774,-0.6302,0.5316,-24.774,12.955
Score,13.746,13.683,1.0046,0.3202,-13.780,41.272
Size,-1.2736,0.6775,-1.8800,0.0663,-2.6365,0.0892
NewItems,0.0421,0.0240,1.7563,0.0855,-0.0061,0.0904
POST,8.1231,11.144,0.7289,0.4697,-14.296,30.542
POScore,-8.0438,14.337,-0.5610,0.5774,-36.887,20.800


In [None]:
#Investors
#With CARea
#Non-Adoption Group
#Score
#Car0 + Car_e0

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e0']])

FE = PanelOLS(dfd.Car0, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Car0,R-squared:,0.4599
Estimator:,PanelOLS,R-squared (Between):,-0.7417
No. Observations:,77,R-squared (Within):,0.4626
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-0.6802
Time:,01:06:18,Log-likelihood,167.62
Cov. Estimator:,Clustered,,
,,F-statistic:,6.5281
Entities:,22,P-value,0.0000
Avg Obs:,3.5000,Distribution:,"F(6,46)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.6598,0.0049,-135.94,0.0000,-0.6696,-0.6500
Score,0.5272,0.4315,1.2219,0.2280,-0.3413,1.3957
Size,-0.0213,0.0166,-1.2862,0.2048,-0.0547,0.0120
NewItems,0.0027,0.0019,1.4002,0.1682,-0.0012,0.0065
POST,0.0414,0.3920,0.1056,0.9163,-0.7476,0.8304
POScore,-0.0306,0.5077,-0.0603,0.9522,-1.0525,0.9913
Car_e0,0.3951,0.2631,1.5020,0.1399,-0.1344,0.9247


In [None]:
#Investors
#With CARea
#Non-Adoption Group
#Score
#Car1 + Car_e1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e1']])

FE = PanelOLS(dfd.Car1, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate

  return Series(np.sqrt(np.diag(self.cov)), self._var_names, name="std_error")


0,1,2,3
Dep. Variable:,Car1,R-squared:,0.5324
Estimator:,PanelOLS,R-squared (Between):,-1.1643
No. Observations:,77,R-squared (Within):,0.5367
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-0.8324
Time:,01:06:59,Log-likelihood,182.93
Cov. Estimator:,Clustered,,
,,F-statistic:,8.7295
Entities:,22,P-value,0.0000
Avg Obs:,3.5000,Distribution:,"F(6,46)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.5438,,,,,
Score,0.3783,0.2787,1.3572,0.1814,-0.1828,0.9394
Size,-0.0172,0.0223,-0.7723,0.4439,-0.0620,0.0276
NewItems,0.0024,0.0012,2.0139,0.0499,1.277e-06,0.0049
POST,0.1959,0.2945,0.6651,0.5093,-0.3969,0.7886
POScore,-0.2203,0.3814,-0.5775,0.5664,-0.9880,0.5475
Car_e1,0.4875,0.1721,2.8329,0.0068,0.1411,0.8338


In [None]:
#Investors
#With CARea
#Non-Adoption Group
#Score
#Car2 + Car_e2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e2']])

FE = PanelOLS(dfd.Car2, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Car2,R-squared:,0.4527
Estimator:,PanelOLS,R-squared (Between):,-1.1154
No. Observations:,77,R-squared (Within):,0.4530
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-0.9821
Time:,01:07:16,Log-likelihood,169.62
Cov. Estimator:,Clustered,,
,,F-statistic:,6.3418
Entities:,22,P-value,0.0001
Avg Obs:,3.5000,Distribution:,"F(6,46)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.6718,0.0729,-9.2206,0.0000,-0.8185,-0.5252
Score,0.5258,0.4214,1.2475,0.2185,-0.3226,1.3741
Size,-0.0231,0.0169,-1.3643,0.1791,-0.0572,0.0110
NewItems,0.0029,0.0019,1.5134,0.1370,-0.0009,0.0067
POST,0.0136,0.3857,0.0351,0.9721,-0.7629,0.7900
POScore,0.0042,0.5003,0.0083,0.9934,-1.0028,1.0111
Car_e2,0.3451,0.2708,1.2743,0.2090,-0.2000,0.8903


In [None]:
#Investors
#With CARea
#Non-Adoption Group
#Score
#Car3 + Car_e3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e3']])

FE = PanelOLS(dfd.Car3, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate

  return Series(np.sqrt(np.diag(self.cov)), self._var_names, name="std_error")


0,1,2,3
Dep. Variable:,Car3,R-squared:,0.5172
Estimator:,PanelOLS,R-squared (Between):,-1.9227
No. Observations:,77,R-squared (Within):,0.5234
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-1.2395
Time:,01:07:40,Log-likelihood,183.29
Cov. Estimator:,Clustered,,
,,F-statistic:,8.2133
Entities:,22,P-value,0.0000
Avg Obs:,3.5000,Distribution:,"F(6,46)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.5090,,,,,
Score,0.3414,0.3061,1.1153,0.2705,-0.2748,0.9575
Size,-0.0210,0.0215,-0.9783,0.3330,-0.0644,0.0223
NewItems,0.0026,0.0011,2.3028,0.0259,0.0003,0.0049
POST,0.1460,0.2948,0.4953,0.6228,-0.4474,0.7394
POScore,-0.1557,0.3815,-0.4082,0.6850,-0.9237,0.6122
Car_e3,0.4517,0.1742,2.5938,0.0127,0.1012,0.8023


In [None]:
#Investors
#With CARea
#Non-Adoption Group
#Score
#Scar0 + Scar_e0

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e0']])

FE = PanelOLS(dfd.Scar0, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Scar0,R-squared:,0.2884
Estimator:,PanelOLS,R-squared (Between):,-0.6154
No. Observations:,77,R-squared (Within):,0.2362
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-0.0750
Time:,01:08:05,Log-likelihood,-99.626
Cov. Estimator:,Clustered,,
,,F-statistic:,3.1076
Entities:,22,P-value,0.0122
Avg Obs:,3.5000,Distribution:,"F(6,46)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-10.857,3.4612,-3.1367,0.0030,-17.824,-3.8897
Score,17.491,13.688,1.2778,0.2077,-10.062,45.043
Size,-0.7764,0.7135,-1.0883,0.2821,-2.2126,0.6597
NewItems,0.0234,0.0227,1.0306,0.3081,-0.0223,0.0690
POST,6.1535,11.522,0.5341,0.5959,-17.040,29.347
POScore,-6.3749,15.026,-0.4242,0.6734,-36.621,23.871
Scar_e0,0.3414,0.2701,1.2643,0.2125,-0.2022,0.8850


In [None]:
#Investors
#With CARea
#Non-Adoption Group
#Score
#Scar1 + Scar_e1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e1']])

FE = PanelOLS(dfd.Scar1, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate

  return Series(np.sqrt(np.diag(self.cov)), self._var_names, name="std_error")


0,1,2,3
Dep. Variable:,Scar1,R-squared:,0.3480
Estimator:,PanelOLS,R-squared (Between):,-1.6127
No. Observations:,77,R-squared (Within):,0.3591
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-0.2551
Time:,01:08:51,Log-likelihood,-94.261
Cov. Estimator:,Clustered,,
,,F-statistic:,4.0915
Entities:,22,P-value,0.0023
Avg Obs:,3.5000,Distribution:,"F(6,46)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-10.569,,,,,
Score,15.661,10.738,1.4584,0.1515,-5.9544,37.276
Size,-0.7587,0.9303,-0.8155,0.4190,-2.6314,1.1139
NewItems,0.0298,0.0169,1.7699,0.0834,-0.0041,0.0638
POST,12.398,9.1658,1.3527,0.1828,-6.0515,30.848
POScore,-14.197,11.813,-1.2018,0.2356,-37.977,9.5819
Scar_e1,0.4248,0.1986,2.1388,0.0378,0.0250,0.8245


In [None]:
#Investors
#With CARea
#Non-Adoption Group
#Score
#Scar2 + Scar_e2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e2']])

FE = PanelOLS(dfd.Scar2, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Scar2,R-squared:,0.2781
Estimator:,PanelOLS,R-squared (Between):,-0.6196
No. Observations:,77,R-squared (Within):,0.1954
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-0.1403
Time:,01:09:08,Log-likelihood,-97.713
Cov. Estimator:,Clustered,,
,,F-statistic:,2.9534
Entities:,22,P-value,0.0160
Avg Obs:,3.5000,Distribution:,"F(6,46)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-10.284,4.1066,-2.5041,0.0159,-18.550,-2.0174
Score,16.249,12.269,1.3244,0.1919,-8.4479,40.947
Size,-0.7969,0.7218,-1.1040,0.2753,-2.2497,0.6560
NewItems,0.0272,0.0217,1.2508,0.2173,-0.0166,0.0710
POST,4.6639,10.484,0.4449,0.6585,-16.439,25.767
POScore,-4.4858,13.736,-0.3266,0.7455,-32.134,23.163
Scar_e2,0.3102,0.2785,1.1136,0.2712,-0.2505,0.8708


In [None]:
#Investors
#With CARea
#Non-Adoption Group
#Score
#Scar3 + Scar_e3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e3']])

FE = PanelOLS(dfd.Scar3, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Scar3,R-squared:,0.3220
Estimator:,PanelOLS,R-squared (Between):,-2.1988
No. Observations:,77,R-squared (Within):,0.3247
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-0.5176
Time:,01:09:29,Log-likelihood,-94.955
Cov. Estimator:,Clustered,,
,,F-statistic:,3.6403
Entities:,22,P-value,0.0049
Avg Obs:,3.5000,Distribution:,"F(6,46)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-8.2932,3.6236,-2.2886,0.0267,-15.587,-0.9992
Score,12.883,11.118,1.1587,0.2525,-9.4962,35.261
Size,-0.8774,0.9135,-0.9605,0.3418,-2.7163,0.9614
NewItems,0.0360,0.0154,2.3323,0.0241,0.0049,0.0671
POST,9.8976,8.7385,1.1326,0.2632,-7.6921,27.487
POScore,-10.948,11.244,-0.9737,0.3353,-33.580,11.684
Scar_e3,0.4020,0.2012,1.9979,0.0517,-0.0030,0.8070


In [None]:
#Analyst
#Without CARea
#Non-Adoption Group
#Score

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Revision,R-squared:,0.4136
Estimator:,PanelOLS,R-squared (Between):,-7.6594
No. Observations:,62,R-squared (Within):,0.3993
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-5.3500
Time:,01:27:02,Log-likelihood,118.76
Cov. Estimator:,Clustered,,
,,F-statistic:,4.7971
Entities:,22,P-value,0.0020
Avg Obs:,2.8182,Distribution:,"F(5,34)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-1.1345,0.9741,-1.1647,0.2522,-3.1140,0.8450
Score,0.1183,0.9161,0.1292,0.8980,-1.7434,1.9801
Size,-0.0066,0.0393,-0.1687,0.8670,-0.0866,0.0733
NewItems,0.0067,0.0036,1.8617,0.0713,-0.0006,0.0141
POST,-0.9976,0.5587,-1.7857,0.0831,-2.1330,0.1377
POScore,1.2358,0.6782,1.8222,0.0772,-0.1424,2.6140


In [None]:
#Analyst
#With CARea
#Non-Adoption Group
#Score
#Car_e0

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e0']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Revision,R-squared:,0.5257
Estimator:,PanelOLS,R-squared (Between):,-3.6427
No. Observations:,61,R-squared (Within):,0.4553
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-2.5369
Time:,01:28:21,Log-likelihood,122.83
Cov. Estimator:,Clustered,,
,,F-statistic:,5.9111
Entities:,22,P-value,0.0003
Avg Obs:,2.7727,Distribution:,"F(6,32)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.7261,0.7538,-0.9633,0.3426,-2.2616,0.8093
Score,0.0286,0.8165,0.0350,0.9723,-1.6345,1.6918
Size,-0.0138,0.0427,-0.3228,0.7490,-0.1007,0.0731
NewItems,0.0050,0.0024,2.0787,0.0457,0.0001,0.0099
POST,-0.9956,0.4866,-2.0460,0.0490,-1.9868,-0.0044
POScore,1.2080,0.5855,2.0632,0.0473,0.0154,2.4007
Car_e0,0.4460,0.2366,1.8851,0.0685,-0.0359,0.9280


In [None]:
#Analyst
#With CARea
#Non-Adoption Group
#Score
#Car_e1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e1']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Revision,R-squared:,0.5364
Estimator:,PanelOLS,R-squared (Between):,-4.6334
No. Observations:,61,R-squared (Within):,0.3912
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-3.3764
Time:,01:28:32,Log-likelihood,123.53
Cov. Estimator:,Clustered,,
,,F-statistic:,6.1708
Entities:,22,P-value,0.0002
Avg Obs:,2.7727,Distribution:,"F(6,32)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.6571,0.6700,-0.9808,0.3340,-2.0218,0.7076
Score,-0.0871,0.6795,-0.1282,0.8988,-1.4712,1.2970
Size,-0.0239,0.0471,-0.5075,0.6153,-0.1198,0.0720
NewItems,0.0058,0.0026,2.2570,0.0310,0.0006,0.0110
POST,-0.9296,0.4412,-2.1073,0.0430,-1.8282,-0.0310
POScore,1.1184,0.5268,2.1232,0.0416,0.0454,2.1914
Car_e1,0.4872,0.3205,1.5199,0.1383,-0.1657,1.1401


In [None]:
#Analyst
#With CARea
#Non-Adoption Group
#Score
#Car_e2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e2']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Revision,R-squared:,0.5088
Estimator:,PanelOLS,R-squared (Between):,-3.6217
No. Observations:,61,R-squared (Within):,0.4508
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-2.5271
Time:,01:28:45,Log-likelihood,121.76
Cov. Estimator:,Clustered,,
,,F-statistic:,5.5248
Entities:,22,P-value,0.0005
Avg Obs:,2.7727,Distribution:,"F(6,32)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.7626,0.7659,-0.9957,0.3269,-2.3226,0.7974
Score,0.0131,0.8348,0.0157,0.9875,-1.6872,1.7135
Size,-0.0078,0.0414,-0.1888,0.8514,-0.0922,0.0765
NewItems,0.0050,0.0024,2.0470,0.0489,2.458e-05,0.0099
POST,-0.9845,0.4922,-2.0000,0.0540,-1.9872,0.0182
POScore,1.1954,0.5920,2.0192,0.0519,-0.0105,2.4014
Car_e2,0.4302,0.2347,1.8330,0.0761,-0.0479,0.9083


In [None]:
#Analyst
#With CARea
#Non-Adoption Group
#Score
#Car_e3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Car_e3']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Revision,R-squared:,0.5260
Estimator:,PanelOLS,R-squared (Between):,-4.3036
No. Observations:,61,R-squared (Within):,0.3957
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-3.1427
Time:,01:28:59,Log-likelihood,122.85
Cov. Estimator:,Clustered,,
,,F-statistic:,5.9187
Entities:,22,P-value,0.0003
Avg Obs:,2.7727,Distribution:,"F(6,32)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.6630,0.6664,-0.9949,0.3272,-2.0203,0.6943
Score,-0.1106,0.6946,-0.1592,0.8745,-1.5254,1.3043
Size,-0.0183,0.0457,-0.3997,0.6920,-0.1113,0.0748
NewItems,0.0056,0.0025,2.2745,0.0298,0.0006,0.0106
POST,-0.9029,0.4333,-2.0839,0.0452,-1.7854,-0.0204
POScore,1.0856,0.5166,2.1016,0.0435,0.0334,2.1378
Car_e3,0.4899,0.3297,1.4857,0.1471,-0.1818,1.1616


In [None]:
#Analyst
#With CARea
#Non-Adoption Group
#Score
#Scar_e0

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e0']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Revision,R-squared:,0.4251
Estimator:,PanelOLS,R-squared (Between):,-7.3685
No. Observations:,61,R-squared (Within):,0.3588
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-5.1899
Time:,01:29:14,Log-likelihood,116.96
Cov. Estimator:,Clustered,,
,,F-statistic:,3.9440
Entities:,22,P-value,0.0046
Avg Obs:,2.7727,Distribution:,"F(6,32)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-1.1297,0.9524,-1.1861,0.2443,-3.0697,0.8103
Score,0.0522,0.9255,0.0564,0.9554,-1.8329,1.9373
Size,0.0006,0.0407,0.0139,0.9890,-0.0823,0.0834
NewItems,0.0066,0.0035,1.8662,0.0712,-0.0006,0.0138
POST,-1.0465,0.5801,-1.8042,0.0806,-2.2281,0.1350
POScore,1.2747,0.6990,1.8236,0.0776,-0.1491,2.6986
Scar_e0,0.0041,0.0048,0.8425,0.4057,-0.0057,0.0138


In [None]:
#Analyst
#With CARea
#Non-Adoption Group
#Score
#Scar_e1

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e1']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Revision,R-squared:,0.4258
Estimator:,PanelOLS,R-squared (Between):,-7.5908
No. Observations:,61,R-squared (Within):,0.3442
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-5.3838
Time:,01:29:28,Log-likelihood,117.00
Cov. Estimator:,Clustered,,
,,F-statistic:,3.9556
Entities:,22,P-value,0.0045
Avg Obs:,2.7727,Distribution:,"F(6,32)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-1.1225,0.9292,-1.2080,0.2359,-3.0153,0.7703
Score,0.0312,0.8953,0.0348,0.9725,-1.7925,1.8548
Size,-0.0010,0.0411,-0.0248,0.9804,-0.0847,0.0826
NewItems,0.0068,0.0037,1.8335,0.0760,-0.0008,0.0143
POST,-1.0340,0.5815,-1.7781,0.0849,-2.2185,0.1505
POScore,1.2566,0.6942,1.8100,0.0797,-0.1575,2.6707
Scar_e1,0.0043,0.0061,0.7071,0.4847,-0.0081,0.0167


In [None]:
#Analyst
#With CARea
#Non-Adoption Group
#Score
#Scar_e2

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e2']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Revision,R-squared:,0.4210
Estimator:,PanelOLS,R-squared (Between):,-7.3974
No. Observations:,61,R-squared (Within):,0.3612
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-5.1979
Time:,01:29:40,Log-likelihood,116.75
Cov. Estimator:,Clustered,,
,,F-statistic:,3.8782
Entities:,22,P-value,0.0051
Avg Obs:,2.7727,Distribution:,"F(6,32)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-1.1504,0.9672,-1.1894,0.2430,-3.1206,0.8198
Score,0.0770,0.9316,0.0827,0.9346,-1.8205,1.9746
Size,0.0012,0.0411,0.0287,0.9773,-0.0825,0.0849
NewItems,0.0066,0.0036,1.8573,0.0725,-0.0006,0.0138
POST,-1.0342,0.5768,-1.7931,0.0824,-2.2090,0.1406
POScore,1.2620,0.6971,1.8102,0.0797,-0.1581,2.6820
Scar_e2,0.0030,0.0041,0.7309,0.4702,-0.0054,0.0115


In [None]:
#Analyst
#With CARea
#Non-Adoption Group
#Score
#Scar_e3

exog = add_constant(dfd[['Score','Size', 'NewItems','Filelate', 'POST', 'POScore','Scar_e3']])

FE = PanelOLS(dfd.Revision, exog,
              entity_effects = True,
              time_effects= True,
              check_rank=False,
              drop_absorbed=True)
              
# Result
result = FE.fit(cov_type = 'clustered',
             cluster_entity=True,
             cluster_time=True
             )

result

Inputs contain missing values. Dropping rows with missing observations.
Variables have been fully absorbed and have removed from the regression:

Filelate



0,1,2,3
Dep. Variable:,Revision,R-squared:,0.4220
Estimator:,PanelOLS,R-squared (Between):,-7.5355
No. Observations:,61,R-squared (Within):,0.3478
Date:,"Sun, Aug 15 2021",R-squared (Overall):,-5.3281
Time:,01:29:49,Log-likelihood,116.80
Cov. Estimator:,Clustered,,
,,F-statistic:,3.8942
Entities:,22,P-value,0.0050
Avg Obs:,2.7727,Distribution:,"F(6,32)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-1.1403,0.9424,-1.2100,0.2351,-3.0599,0.7793
Score,0.0558,0.9040,0.0617,0.9512,-1.7857,1.8972
Size,-6.03e-06,0.0411,-0.0001,0.9999,-0.0837,0.0837
NewItems,0.0067,0.0036,1.8390,0.0752,-0.0007,0.0141
POST,-1.0239,0.5780,-1.7716,0.0860,-2.2012,0.1534
POScore,1.2463,0.6907,1.8043,0.0806,-0.1607,2.6533
Scar_e3,0.0035,0.0057,0.6094,0.5466,-0.0081,0.0150


In [None]:
dfd.index

MultiIndex([(  'AEM', '2016-01-01'),
            (  'AEM', '2017-01-01'),
            (  'AEM', '2018-01-01'),
            (  'AEM', '2019-01-01'),
            (  'BHC', '2016-01-01'),
            (  'BHC', '2017-01-01'),
            (  'BHC', '2018-01-01'),
            (  'BHC', '2019-01-01'),
            (  'CAE', '2015-01-01'),
            (  'CAE', '2016-01-01'),
            (  'CAE', '2017-01-01'),
            (  'CAE', '2018-01-01'),
            (  'CAE', '2019-01-01'),
            (   'CP', '2016-01-01'),
            (   'CP', '2017-01-01'),
            (   'CP', '2018-01-01'),
            (   'CP', '2019-01-01'),
            (   'CP', '2020-01-01'),
            (  'CLS', '2016-01-01'),
            (  'CLS', '2017-01-01'),
            (  'CLS', '2018-01-01'),
            (  'CLS', '2019-01-01'),
            (  'CLS', '2020-01-01'),
            (  'CVE', '2016-01-01'),
            (  'CVE', '2017-01-01'),
            (  'CVE', '2018-01-01'),
            (  'CVE', '2019-01-01'),
 

In [None]:
exog

Unnamed: 0_level_0,Unnamed: 1_level_0,const,Score,Size,NewItems,Filelate,NewItems,Auditor_OI,AuScore
tic,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AEM,2016,1.0,,8.868969,153,0.0,153,0,
AEM,2017,1.0,0.84,8.970254,157,0.0,157,0,0.0
AEM,2018,1.0,0.83,8.968631,158,0.0,158,0,0.0
AEM,2019,1.0,0.84,9.081357,147,0.0,147,0,0.0
BHC,2016,1.0,,10.681183,194,0.0,194,0,
...,...,...,...,...,...,...,...,...,...
WCN,2016,1.0,,9.323053,169,0.0,169,0,
WCN,2017,1.0,0.83,9.393885,165,0.0,165,0,0.0
WCN,2018,1.0,0.82,9.443619,162,0.0,162,0,0.0
WCN,2019,1.0,0.80,9.527899,156,0.0,156,0,0.0


In [None]:
pip install statsmodels


Collecting statsmodels
  Using cached https://files.pythonhosted.org/packages/da/69/8eef30a6237c54f3c0b524140e2975f4b1eea3489b45eb3339574fc8acee/statsmodels-0.12.2-cp37-cp37m-manylinux1_x86_64.whl
Installing collected packages: statsmodels
Successfully installed statsmodels-0.12.2


In [None]:
pip install pandas 



In [None]:
len(dfd)


93

In [None]:
# For Claire
# spreg has a seperate api from pysal now
import spreg
#import pysal as ps

fe1_d = pd.get_dummies(dfd['fyear'], 'd_year')
exog = sm.add_constant(dfd[['Score','Size','NewItems']])
xone = np.hstack((exog, fe1_d.values))
m1_dummy = spreg.BaseOLS(dfd['Car_e1'].values, xone)
m1_dummy.betas

array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])

array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])