## Import libraries

In [1]:
import wooldridge as woo
import pandas as pd
import numpy as np
import statsmodels.api as sm

## Load dataset

In [2]:
df = woo.data("ceosal2")
df.head()

Unnamed: 0,salary,age,college,grad,comten,ceoten,sales,profits,mktval,lsalary,lsales,lmktval,comtensq,ceotensq,profmarg
0,1161,49,1,1,9,2,6200.0,966,23200.0,7.057037,8.732305,10.051908,81,4,15.580646
1,600,43,1,1,10,10,283.0,48,1100.0,6.39693,5.645447,7.003066,100,100,16.96113
2,379,51,1,1,9,3,169.0,40,1100.0,5.937536,5.129899,7.003066,81,9,23.668638
3,651,55,1,0,22,22,1100.0,-54,1000.0,6.478509,7.003066,6.907755,484,484,-4.909091
4,497,44,1,1,8,6,351.0,28,387.0,6.20859,5.860786,5.958425,64,36,7.977208


## (i) Average $salary$ and average $ceoten$

In [3]:
# Mean salary (in thousands of dollars)
avg_salary = df['salary'].mean()

# Mean years as CEO
avg_ceoten = df['ceoten'].mean()

print(f"Average salary: {avg_salary:.2f} thousand dollars")
print(f"Average years as CEO: {avg_ceoten:.2f}")

Average salary: 865.86 thousand dollars
Average years as CEO: 7.95


## (ii) CEOs in the first year and maximum tenure

In [4]:
# Number of CEOs in their first year
first_year_ceos = (df['ceoten'] == 0).sum()

# Maximum tenure as CEO
max_ceoten = df['ceoten'].max()

print(f"Number of CEOs in their first year: {first_year_ceos}")
print(f"Maximum tenure as CEO: {max_ceoten} years")


Number of CEOs in their first year: 5
Maximum tenure as CEO: 37 years


## (iii) Simple regression: log($salary$) ~ $ceoten$

In [5]:
# Dependent variable: log of salary
y = np.log(df['salary'])

# Independent variable: ceoten
X = sm.add_constant(df['ceoten'])

# OLS regression
model = sm.OLS(y, X).fit()

print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                 salary   R-squared:                       0.013
Model:                            OLS   Adj. R-squared:                  0.008
Method:                 Least Squares   F-statistic:                     2.334
Date:                Tue, 09 Sep 2025   Prob (F-statistic):              0.128
Time:                        15:09:44   Log-Likelihood:                -160.84
No. Observations:                 177   AIC:                             325.7
Df Residuals:                     175   BIC:                             332.0
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          6.5055      0.068     95.682      0.0

The model suggests that each additional year as CEO increases salary by about 1%, but this relationship is not statistically significant (P>|t| = 
0.128).

Tenure alone does not explain CEO salaries well in this sample.