# Do CEO's impact firm performance?

## 1. Imports

### 1.1 Libraries

In [2]:
# import libraries
import pandas as pd
from linearmodels import PanelOLS
import statsmodels.api as sm

#TODO: SettingWithCopyWarning

### 1.2 Data

In [3]:
# import data
ceo_data_raw = pd.read_csv("data/Execucomp_2006_-_2021.csv")
company_data_raw = pd.read_csv("data/CCM_Fundamentals_Annual_2006_-_2021_winsorized.csv")
annual_return_price_data_raw = pd.read_csv("data/Compustat_annual_return.csv")
annual_stdev_price_data_raw = pd.read_csv("data/CRSP_annual_standard_deviation.csv")

## 2. Definitions

In [4]:
# all columns in datasets shown as lists
print('CEO_DATA_RAW: \n', list(ceo_data_raw.columns), '\n')
print('COMPANY_DATA_RAW: \n', list(company_data_raw.columns), '\n')
print('ANNUAL_RETURN_PRICE_DATA_RAW: \n', list(annual_return_price_data_raw.columns), '\n')
print('ANNUAL_STDEV_PRICE_DATA_RAW: \n', list(annual_stdev_price_data_raw.columns), '\n')

CEO_DATA_RAW: 
 ['EXEC_FULLNAME', 'CO_PER_ROL', 'CONAME', 'CEOANN', 'AGE', 'GVKEY', 'EXECID', 'YEAR', 'BECAMECEO', 'JOINED_CO', 'REJOIN', 'LEFTOFC', 'LEFTCO', 'RELEFT', 'PCEO', 'PCFO', 'TITLE', 'REASON', 'EXEC_LNAME', 'EXEC_FNAME', 'EXEC_MNAME', 'GENDER', 'NAMEPREFIX', 'PAGE', 'CUSIP', 'EXCHANGE', 'ADDRESS', 'CITY', 'STATE', 'ZIP', 'TELE', 'SICDESC', 'NAICSDESC', 'INDDESC', 'SPCODE', 'TICKER', 'SUB_TELE', 'NAICS', 'SPINDEX', 'SIC'] 

COMPANY_DATA_RAW: 
 ['Unnamed: 0', 'GVKEY', 'LINKPRIM', 'LIID', 'LINKTYPE', 'LPERMNO', 'LPERMCO', 'LINKDT', 'LINKENDDT', 'datadate', 'fyear', 'indfmt', 'consol', 'popsrc', 'datafmt', 'tic', 'cusip', 'conm', 'curcd', 'fyr', 'aqc', 'at', 'capx', 'ceq', 'che', 'csho', 'dlc', 'dltt', 'dp', 'dvc', 'dvp', 'ebit', 'ebitda', 'ibc', 'lt', 'ni', 'oancf', 'ppent', 'rdip', 'sale', 'txdb', 'xad', 'xsga', 'prcc_f', 'ROA', 'Tobins_Q', 'Cash_Flow', 'Leverage', 'Investment', 'Cash_Holdings', 'Div_over_Earn', 'SQ_A', 'Acquisitions', 'ROA_w01_w99', 'Tobins_Q_w95', 'CF_w01_w9

## 3. Data Overview

In [5]:
company_data_raw.head()

Unnamed: 0.1,Unnamed: 0,GVKEY,LINKPRIM,LIID,LINKTYPE,LPERMNO,LPERMCO,LINKDT,LINKENDDT,datadate,...,Div_over_Earn,SQ_A,Acquisitions,ROA_w01_w99,Tobins_Q_w95,CF_w01_w99,Lev_w05_w95,Inv_w01_w99,CashHold_w95,DoE_w01_w99
0,0,1004,P,1,LU,54594,20000,19720424,E,20070531,...,0.0,0.099033,1,0.054944,1.685582,0.08584,0.66335,0.114892,0.320244,0.0
1,1,1004,P,1,LU,54594,20000,19720424,E,20080531,...,0.0,0.097841,1,0.055171,1.119154,0.084946,0.907146,0.097728,0.362234,0.0
2,2,1004,P,1,LU,54594,20000,19720424,E,20090531,...,0.0,0.103386,0,0.057096,0.938077,0.087949,0.695064,0.11212,0.458108,0.0
3,3,1004,P,1,LU,54594,20000,19720424,E,20100531,...,0.0,0.112342,1,0.029731,1.020605,0.054717,0.585112,0.086281,0.237329,0.0
4,4,1004,P,1,LU,54594,20000,19720424,E,20110531,...,0.015195,0.09652,0,0.040984,1.125593,0.077733,0.531052,0.298922,0.137477,0.015195


In [6]:
ceo_data_raw.head()

Unnamed: 0,EXEC_FULLNAME,CO_PER_ROL,CONAME,CEOANN,AGE,GVKEY,EXECID,YEAR,BECAMECEO,JOINED_CO,...,TELE,SICDESC,NAICSDESC,INDDESC,SPCODE,TICKER,SUB_TELE,NAICS,SPINDEX,SIC
0,A. Alexander Rhodes,52774,TAILORED BRANDS INC,,56.0,25167,49280,2015,,,...,281 776 7000,APPAREL AND ACCESSORY STORES,Men's Clothing Stores,Apparel Retail,EX,TLRDQ,281.0,448110,2550.0,5600
1,A. Alexander Rhodes,52774,TAILORED BRANDS INC,,57.0,25167,49280,2016,,,...,281 776 7000,APPAREL AND ACCESSORY STORES,Men's Clothing Stores,Apparel Retail,EX,TLRDQ,281.0,448110,2550.0,5600
2,A. Alexander Rhodes,52774,TAILORED BRANDS INC,,58.0,25167,49280,2017,,,...,281 776 7000,APPAREL AND ACCESSORY STORES,Men's Clothing Stores,Apparel Retail,EX,TLRDQ,281.0,448110,2550.0,5600
3,A. Alexander Rhodes,52774,TAILORED BRANDS INC,,59.0,25167,49280,2018,,,...,281 776 7000,APPAREL AND ACCESSORY STORES,Men's Clothing Stores,Apparel Retail,EX,TLRDQ,281.0,448110,2550.0,5600
4,A. Alexander Rhodes,52774,TAILORED BRANDS INC,,60.0,25167,49280,2019,,,...,281 776 7000,APPAREL AND ACCESSORY STORES,Men's Clothing Stores,Apparel Retail,EX,TLRDQ,281.0,448110,2550.0,5600


In [7]:
annual_return_price_data_raw.head()

Unnamed: 0,LPERMNO,fyear,prcc_f,ajex,adjust_prccf,prcc_pcchg
0,54594,2007.0,19.28,1.0,19.28,-0.406769
1,54594,2008.0,14.7,1.0,14.7,-0.237552
2,54594,2009.0,19.7,1.0,19.7,0.340136
3,54594,2010.0,26.39,1.0,26.39,0.339594
4,54594,2011.0,12.05,1.0,12.05,-0.543388


In [8]:
annual_stdev_price_data_raw.head()

Unnamed: 0,LPERMNO,year,stand_dev
0,10001,2006,0.024685
1,10001,2007,0.019516
2,10001,2008,0.045596
3,10001,2009,0.026645
4,10001,2010,0.016674


## 4. Data Cleaning

### 4.1 Drop Columns

In [9]:
company_cols = ['GVKEY', 'LPERMNO', 'prcc_f', 'fyear', 'ROA_w01_w99', 'Tobins_Q_w95', 'CF_w01_w99',
                'Lev_w05_w95', 'Inv_w01_w99', 'CashHold_w95', 'DoE_w01_w99', 'Acquisitions']

ceo_cols = ['GVKEY', 'CO_PER_ROL', 'YEAR', 'AGE', 'BECAMECEO', 'TITLE', 'CEOANN', 'LEFTOFC', 'LEFTCO', 'JOINED_CO',
            'CONAME', 'EXECID']

annual_return_cols = ['LPERMNO', 'fyear', 'prcc_pcchg']
annual_stdev_cols = ['LPERMNO', 'year', 'stand_dev']

company_data = company_data_raw[company_cols]
ceo_data = ceo_data_raw[ceo_cols]
annual_return_price_data = annual_return_price_data_raw[annual_return_cols]
annual_stdev_price_data = annual_stdev_price_data_raw[annual_stdev_cols]

### 4.2 Filter Data

In [10]:
# filter data
ceo_data = ceo_data[ceo_data.CEOANN == "CEO"] # extracting only CEOs from manager data
# drop age with nans - about 100 rows removed
ceo_data = ceo_data[ceo_data['AGE'].notna()]

### 4.3 Rename Columns

In [11]:
# rename y's
annual_return_price_data.rename(columns={'prcc_pcchg': 'avg_return'}, inplace=True)
annual_stdev_price_data.rename(columns={'stand_dev': 'sd_return'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


## 5. Feature engineering

### 5.1 Join Data 

In [12]:
# join data
data_joined = company_data.join(annual_return_price_data.set_index(['LPERMNO', 'fyear']), on=['LPERMNO', 'fyear'], how='inner',
                                lsuffix='',
                                rsuffix='', sort=False)

data_joined = data_joined.join(annual_stdev_price_data.set_index(['LPERMNO', 'year']), on=['LPERMNO', 'fyear'], how='inner',
                                lsuffix='',
                                rsuffix='', sort=False)

data_joined = data_joined.join(ceo_data.set_index(['GVKEY', 'YEAR']), on=['GVKEY', 'fyear'], how='inner', lsuffix='',
                               rsuffix='', sort=False)

In [13]:
# removing the data for 2021 - a lot of data is not available yet
data_joined = data_joined[data_joined["fyear"] != 2021]

### 5.2 CEO Position Dummy Variables

In [14]:
#founder - 1, otherwise 0
if_founder = data_joined['TITLE'].str.contains('founder')
data_joined['dummy_founder'] = if_founder

In [15]:
# president - 1, otherwise 0
if_president = data_joined['TITLE'].str.contains('president')
data_joined['dummy_president'] = if_founder

In [16]:
# chairman - 1, otherwise 0
if_chairman = data_joined['TITLE'].str.contains('chairman')
data_joined['dummy_chairman'] = if_founder

In [17]:
# chairman + president - 1, otherwise 0
data_joined['dummy_chairman_president'] = data_joined['TITLE'].str.contains('|'.join(['chairmam', 'president']))

In [18]:
# drop columns only important for joining
data_joined.drop(['TITLE'], axis=1, inplace=True)

In [19]:
# expect CEOs with nan in the title column to be only CEO and having no other position
data_joined['dummy_founder'].fillna(False)
data_joined['dummy_president'].fillna(False)
data_joined['dummy_chairman'].fillna(False)
data_joined['dummy_chairman_president'].fillna(False)

1        False
2        False
3        False
4        False
5        False
         ...  
78281    False
78329    False
78330    False
78340    False
78341    False
Name: dummy_chairman_president, Length: 21781, dtype: bool

### 5.3 Fixing the NaN Date Values

In [20]:
# if ceo has Nan for BECAMECEO we set it to JOIN_CO if it exists
# before doing that, we made sure that the value didn't exist in some other record from another year
def fix_becameceo(row):
    if pd.isnull(row['BECAMECEO']):
        return row['JOINED_CO']
    return row['BECAMECEO']

In [21]:
# setting the start date as the JOINED_CO to minimize Nan values
data_joined['BECAMECEO'] = data_joined.apply(lambda row: fix_becameceo(row), axis=1)

In [22]:
# if ceo has Nan for LEFTOFC, we set it to 31.12.2020 - we assume the person is still the CEO of the company
# we excluded 2021 data, that's why we used 2020
def fix_leftofc(row):
    if pd.isnull(row['LEFTOFC']):
        return '20201231'
    return row['LEFTOFC']

In [23]:
data_joined['LEFTOFC'] = data_joined.apply(lambda row: fix_leftofc(row), axis=1)

In [24]:
# checking Nan values in LEFTOFC
print(data_joined['LEFTOFC'].isnull().sum()) # 0 Nan

0


### 5.4 Filtering

Three year requirement for managers

In [25]:
# changing the column of type string to DateTime type, to easily calculate CEO tenure
data_joined['BECAMECEO'] = pd.to_datetime(data_joined['BECAMECEO'], format='%Y%m%d')
data_joined['LEFTOFC'] = pd.to_datetime(data_joined['LEFTOFC'], format='%Y%m%d')

In [26]:
data_joined['3Y_THRESH'] = data_joined['LEFTOFC'].dt.year - data_joined['BECAMECEO'].dt.year
data_joined = data_joined[data_joined['3Y_THRESH'] >= 3]  # no change because all ceo's so far have stayed>= 3 yr
# drop column used for 3yr requirement
data_joined.drop(['3Y_THRESH'], axis=1, inplace=True)

Two company information for managers

In [27]:
# for each EXEC ID - at least 2 distinct GVKEY
tempdf = data_joined
var = data_joined[data_joined['EXECID'] == 17580]  # manually checking

In [28]:
# set number of companies as ceo_group feature
ceo_group = data_joined.groupby('EXECID')['GVKEY'].nunique() # not enough data
ceo_group.name = 'ceo_group'

data_joined = data_joined.join(ceo_group, on=['EXECID'], how='inner', lsuffix='',
                               rsuffix='', sort=False)

CEO tenure

In [29]:
# how many days as CEO - ceo_tenure
data_joined['ceo_tenure'] = (data_joined['LEFTOFC'] - data_joined['BECAMECEO']).dt.days

### 5.5 Final prep

In [30]:
# drop columns only important for calculating date related attributes
data_joined.drop(
    ['JOINED_CO', 'BECAMECEO', 'LEFTCO', 'LEFTOFC', 'CO_PER_ROL', 'CEOANN', 'CONAME', 'LPERMNO', 'prcc_f', 'EXECID'],
    axis=1,
    inplace=True)

In [31]:
data_joined.shape

(20077, 19)

In [32]:
# checking null/nan values for each column
data_joined.isnull().sum(axis=0)

GVKEY                          0
fyear                          0
ROA_w01_w99                    1
Tobins_Q_w95                   2
CF_w01_w99                  1556
Lev_w05_w95                   48
Inv_w01_w99                 1402
CashHold_w95                1339
DoE_w01_w99                 1328
Acquisitions                   0
avg_return                     0
sd_return                      0
AGE                            0
dummy_founder                 40
dummy_president               40
dummy_chairman                40
dummy_chairman_president      40
ceo_group                      0
ceo_tenure                     0
dtype: int64

In [33]:
# removing nan rows in the end
data_joined.dropna(inplace=True)

In [34]:
# the final size of data_joined
data_joined.shape

(18104, 19)

## 6. Modelling

### 6.1 Models for predicting avg_return (all vars)

In [35]:
data_model = data_joined.set_index(['GVKEY', 'fyear', ]).copy(deep=True)

data_model.drop('sd_return', axis=1, inplace=True)
X = sm.add_constant(data_model.drop('avg_return', axis=1))
y = data_model.avg_return

# Company and Year fixed effects
mod = PanelOLS(y, X, drop_absorbed=True, check_rank=False, entity_effects=True, time_effects=True)
re_res = mod.fit()
re_res

Variables have been fully absorbed and have removed from the regression:

dummy_president, dummy_chairman



0,1,2,3
Dep. Variable:,avg_return,R-squared:,0.0156
Estimator:,PanelOLS,R-squared (Between):,-0.0283
No. Observations:,18104,R-squared (Within):,0.0206
Date:,"Fri, Mar 18 2022",R-squared (Overall):,-0.0043
Time:,15:51:36,Log-likelihood,-2.715e+04
Cov. Estimator:,Unadjusted,,
,,F-statistic:,19.770
Entities:,1834,P-value,0.0000
Avg Obs:,9.8713,Distribution:,"F(13,16244)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0443,0.1605,0.2763,0.7823,-0.2702,0.3589
ROA_w01_w99,0.6278,0.3533,1.7769,0.0756,-0.0647,1.3204
Tobins_Q_w95,0.2300,0.0182,12.658,0.0000,0.1944,0.2656
CF_w01_w99,-0.4637,0.3806,-1.2182,0.2232,-1.2098,0.2824
Lev_w05_w95,0.0044,0.0138,0.3171,0.7512,-0.0227,0.0315
Inv_w01_w99,-0.7145,0.1022,-6.9919,0.0000,-0.9148,-0.5142
CashHold_w95,0.0130,0.0027,4.7521,0.0000,0.0076,0.0183
DoE_w01_w99,-0.0784,0.0576,-1.3622,0.1732,-0.1913,0.0344
Acquisitions,-0.0776,0.0230,-3.3769,0.0007,-0.1226,-0.0326


In [36]:
# Company fixed effects
mod = PanelOLS(y, X, drop_absorbed=True, check_rank=False, entity_effects=True, time_effects=False)
re_res = mod.fit()
re_res

0,1,2,3
Dep. Variable:,avg_return,R-squared:,0.0215
Estimator:,PanelOLS,R-squared (Between):,-0.0439
No. Observations:,18104,R-squared (Within):,0.0215
Date:,"Fri, Mar 18 2022",R-squared (Overall):,-0.0114
Time:,15:51:37,Log-likelihood,-2.74e+04
Cov. Estimator:,Unadjusted,,
,,F-statistic:,27.447
Entities:,1834,P-value,0.0000
Avg Obs:,9.8713,Distribution:,"F(13,16257)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0861,0.1562,0.5510,0.5816,-0.2201,0.3923
ROA_w01_w99,0.4557,0.3570,1.2763,0.2019,-0.2441,1.1555
Tobins_Q_w95,0.2510,0.0177,14.149,0.0000,0.2162,0.2857
CF_w01_w99,-0.2127,0.3847,-0.5529,0.5803,-0.9668,0.5413
Lev_w05_w95,-0.0140,0.0139,-1.0130,0.3111,-0.0412,0.0131
Inv_w01_w99,-0.9618,0.0998,-9.6341,0.0000,-1.1575,-0.7661
CashHold_w95,0.0162,0.0028,5.8754,0.0000,0.0108,0.0216
DoE_w01_w99,-0.0891,0.0580,-1.5349,0.1248,-0.2028,0.0247
Acquisitions,-0.0982,0.0232,-4.2324,0.0000,-0.1437,-0.0527


In [37]:
# Year fixed effects
mod = PanelOLS(y, X, drop_absorbed=True, check_rank=False, entity_effects=False, time_effects=True)
re_res = mod.fit()
re_res

0,1,2,3
Dep. Variable:,avg_return,R-squared:,0.0097
Estimator:,PanelOLS,R-squared (Between):,0.0064
No. Observations:,18104,R-squared (Within):,0.0101
Date:,"Fri, Mar 18 2022",R-squared (Overall):,0.0107
Time:,15:51:37,Log-likelihood,-2.863e+04
Cov. Estimator:,Unadjusted,,
,,F-statistic:,13.587
Entities:,1834,P-value,0.0000
Avg Obs:,9.8713,Distribution:,"F(13,18077)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0238,0.0970,0.2451,0.8064,-0.1663,0.2138
ROA_w01_w99,0.2079,0.2463,0.8441,0.3986,-0.2749,0.6908
Tobins_Q_w95,0.1000,0.0101,9.8793,0.0000,0.0801,0.1198
CF_w01_w99,-0.5456,0.2563,-2.1287,0.0333,-1.0480,-0.0432
Lev_w05_w95,0.0177,0.0089,1.9967,0.0459,0.0003,0.0350
Inv_w01_w99,-0.2386,0.0703,-3.3937,0.0007,-0.3764,-0.1008
CashHold_w95,0.0068,0.0013,5.0535,0.0000,0.0041,0.0094
DoE_w01_w99,-0.1575,0.0451,-3.4957,0.0005,-0.2458,-0.0692
Acquisitions,-0.0708,0.0177,-3.9883,0.0001,-0.1056,-0.0360


### 6.2 Models for predicting sd_return (all vars)

In [38]:
data_model = data_joined.set_index(['GVKEY', 'fyear']).copy(deep=True)

data_model.drop('avg_return', axis=1, inplace=True)
X = sm.add_constant(data_model.drop('sd_return', axis=1))
y = data_model.sd_return

# Company and Year fixed effects
mod = PanelOLS(y, X, drop_absorbed=True, check_rank=False, entity_effects=True, time_effects=True)
re_res = mod.fit()
print(re_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:              sd_return   R-squared:                        0.0472
Estimator:                   PanelOLS   R-squared (Between):              0.1856
No. Observations:               18104   R-squared (Within):               0.0622
Date:                Fri, Mar 18 2022   R-squared (Overall):              0.0891
Time:                        15:51:38   Log-likelihood                 5.229e+04
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      61.957
Entities:                        1834   P-value                           0.0000
Avg Obs:                       9.8713   Distribution:                F(13,16244)
Min Obs:                       1.0000                                           
Max Obs:                       38.000   F-statistic (robust):             61.957
                            

In [39]:
# Company fixed effects
mod = PanelOLS(y, X, drop_absorbed=True, check_rank=False, entity_effects=True, time_effects=False)
re_res = mod.fit()
print(re_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:              sd_return   R-squared:                        0.0774
Estimator:                   PanelOLS   R-squared (Between):              0.1445
No. Observations:               18104   R-squared (Within):               0.0774
Date:                Fri, Mar 18 2022   R-squared (Overall):              0.0797
Time:                        15:51:38   Log-likelihood                 4.987e+04
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      104.95
Entities:                        1834   P-value                           0.0000
Avg Obs:                       9.8713   Distribution:                F(13,16257)
Min Obs:                       1.0000                                           
Max Obs:                       38.000   F-statistic (robust):             104.95
                            

In [40]:
# Year fixed effects
mod = PanelOLS(y, X, drop_absorbed=True, check_rank=False, entity_effects=False, time_effects=True)
re_res = mod.fit()
print(re_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:              sd_return   R-squared:                        0.1113
Estimator:                   PanelOLS   R-squared (Between):              0.2959
No. Observations:               18104   R-squared (Within):               0.0553
Date:                Fri, Mar 18 2022   R-squared (Overall):              0.1161
Time:                        15:51:38   Log-likelihood                 4.967e+04
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      174.07
Entities:                        1834   P-value                           0.0000
Avg Obs:                       9.8713   Distribution:                F(13,18077)
Min Obs:                       1.0000                                           
Max Obs:                       38.000   F-statistic (robust):             174.07
                            

### 6.3 Models for predicting avg_return (only ceo vars)

In [41]:
data_model = data_joined.set_index(['GVKEY', 'fyear']).copy(deep=True)
data_model = data_model[
    ['AGE', 'ceo_tenure', 'dummy_chairman_president', 'dummy_chairman', 'dummy_president', 'dummy_founder',
     'ceo_group', 'avg_return', 'sd_return']]

data_model.drop('sd_return', axis=1, inplace=True)
X = sm.add_constant(data_model.drop('avg_return', axis=1))
y = data_model.avg_return

# Company and Year fixed effects
mod = PanelOLS(y, X, drop_absorbed=True, check_rank=False, entity_effects=True, time_effects=True)
re_res = mod.fit()
print(re_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:             avg_return   R-squared:                        0.0005
Estimator:                   PanelOLS   R-squared (Between):              0.0039
No. Observations:               18104   R-squared (Within):               0.0001
Date:                Fri, Mar 18 2022   R-squared (Overall):             -0.0002
Time:                        15:51:39   Log-likelihood                -2.729e+04
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      1.7511
Entities:                        1834   P-value                           0.1193
Avg Obs:                       9.8713   Distribution:                 F(5,16252)
Min Obs:                       1.0000                                           
Max Obs:                       38.000   F-statistic (robust):             1.7511
                            

Variables have been fully absorbed and have removed from the regression:

dummy_president, dummy_founder



In [42]:
# Company fixed effects
mod = PanelOLS(y, X, drop_absorbed=True, check_rank=False, entity_effects=True, time_effects=False)
re_res = mod.fit()
print(re_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:             avg_return   R-squared:                        0.0004
Estimator:                   PanelOLS   R-squared (Between):             -0.0010
No. Observations:               18104   R-squared (Within):               0.0004
Date:                Fri, Mar 18 2022   R-squared (Overall):             -0.0005
Time:                        15:51:39   Log-likelihood                -2.759e+04
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      1.2183
Entities:                        1834   P-value                           0.2975
Avg Obs:                       9.8713   Distribution:                 F(5,16265)
Min Obs:                       1.0000                                           
Max Obs:                       38.000   F-statistic (robust):             1.2183
                            

In [43]:
# Year fixed effects
mod = PanelOLS(y, X, drop_absorbed=True, check_rank=False, entity_effects=False, time_effects=True)
re_res = mod.fit()
print(re_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:             avg_return   R-squared:                        0.0011
Estimator:                   PanelOLS   R-squared (Between):              0.0064
No. Observations:               18104   R-squared (Within):              -0.0003
Date:                Fri, Mar 18 2022   R-squared (Overall):              0.0002
Time:                        15:51:39   Log-likelihood                -2.871e+04
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      3.8999
Entities:                        1834   P-value                           0.0016
Avg Obs:                       9.8713   Distribution:                 F(5,18085)
Min Obs:                       1.0000                                           
Max Obs:                       38.000   F-statistic (robust):             3.8999
                            

### 6.4 Models for predicting sd_return (only CEO vars)

In [44]:
# models for predicting sd_return (only CEO vars)

data_model = data_joined.set_index(['GVKEY', 'fyear']).copy(deep=True)
data_model = data_model[
    ['AGE', 'ceo_tenure', 'dummy_chairman_president', 'dummy_chairman', 'dummy_president', 'dummy_founder',
     'ceo_group', 'avg_return', 'sd_return']]

data_model.drop('avg_return', axis=1, inplace=True)
X = sm.add_constant(data_model.drop('sd_return', axis=1))
y = data_model.sd_return

# Company and Year fixed effects
mod = PanelOLS(y, X, drop_absorbed=True, check_rank=False, entity_effects=True, time_effects=True)
re_res = mod.fit()
print(re_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:              sd_return   R-squared:                        0.0007
Estimator:                   PanelOLS   R-squared (Between):             -0.0054
No. Observations:               18104   R-squared (Within):               0.0019
Date:                Fri, Mar 18 2022   R-squared (Overall):              0.0023
Time:                        15:51:40   Log-likelihood                 5.186e+04
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      2.1518
Entities:                        1834   P-value                           0.0564
Avg Obs:                       9.8713   Distribution:                 F(5,16252)
Min Obs:                       1.0000                                           
Max Obs:                       38.000   F-statistic (robust):             2.1518
                            

In [45]:
# Company fixed effects
mod = PanelOLS(y, X, drop_absorbed=True, check_rank=False, entity_effects=True, time_effects=False)
re_res = mod.fit()
print(re_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:              sd_return   R-squared:                        0.0057
Estimator:                   PanelOLS   R-squared (Between):             -0.0329
No. Observations:               18104   R-squared (Within):               0.0057
Date:                Fri, Mar 18 2022   R-squared (Overall):             -0.0040
Time:                        15:51:40   Log-likelihood                 4.919e+04
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      18.588
Entities:                        1834   P-value                           0.0000
Avg Obs:                       9.8713   Distribution:                 F(5,16265)
Min Obs:                       1.0000                                           
Max Obs:                       38.000   F-statistic (robust):             18.588
                            

In [46]:
# Year fixed effects
mod = PanelOLS(y, X, drop_absorbed=True, check_rank=False, entity_effects=False, time_effects=True)
re_res = mod.fit()
print(re_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:              sd_return   R-squared:                        0.0018
Estimator:                   PanelOLS   R-squared (Between):              0.0035
No. Observations:               18104   R-squared (Within):               0.0010
Date:                Fri, Mar 18 2022   R-squared (Overall):              0.0030
Time:                        15:51:40   Log-likelihood                 4.862e+04
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      6.6045
Entities:                        1834   P-value                           0.0000
Avg Obs:                       9.8713   Distribution:                 F(5,18085)
Min Obs:                       1.0000                                           
Max Obs:                       38.000   F-statistic (robust):             6.6045
                            