# Follow-Along Activity

In [None]:
!pip install -q linearmodels

In [None]:
import pandas as pd
from linearmodels.panel import PanelOLS, RandomEffects
import statsmodels.api as sm

In [None]:
# Create data for 3 firms over 6 years
data = {
    'Firm': ['A']*6 + ['B']*6 + ['C']*6,
    'Year': [2018, 2019, 2020, 2021, 2022, 2023]*3,
    'Revenue': [210, 230, 250, 265, 280, 295,
                190, 200, 215, 225, 240, 260,
                180, 195, 210, 220, 235, 250],
    'Advertising': [20, 21, 22, 23, 24, 25,
                    15, 16, 17, 18, 19, 20,
                    10, 12, 14, 16, 18, 20]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Set hierarchical index: Firm and Year
df.set_index(['Firm', 'Year'], inplace=True)
print(df.head())

           Revenue  Advertising
Firm Year                      
A    2018      210           20
     2019      230           21
     2020      250           22
     2021      265           23
     2022      280           24


In [None]:
# Add constant term
df['const'] = 1

# Define dependent and independent variables
y = df['Revenue']
X = df[['const', 'Advertising']]

In [None]:
model_fe = PanelOLS(y, X, entity_effects=True)
results_fe = model_fe.fit()
print(results_fe.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:                Revenue   R-squared:                        0.8434
Estimator:                   PanelOLS   R-squared (Between):              0.4239
No. Observations:                  18   R-squared (Within):               0.8434
Date:                Thu, Jun 05 2025   R-squared (Overall):              0.7086
Time:                        16:58:01   Log-likelihood                   -67.095
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      75.405
Entities:                           3   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                    F(1,14)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             75.405
                            

In [None]:
model_re = RandomEffects(y, X)
results_re = model_re.fit()
print(results_re.summary)

                        RandomEffects Estimation Summary                        
Dep. Variable:                Revenue   R-squared:                        0.8130
Estimator:              RandomEffects   R-squared (Between):              0.8627
No. Observations:                  18   R-squared (Within):               0.7951
Date:                Thu, Jun 05 2025   R-squared (Overall):              0.8168
Time:                        16:58:02   Log-likelihood                   -71.459
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      69.543
Entities:                           3   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                    F(1,16)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             69.543
                            

# Your Project

In [None]:
# Step 1: Install and import necessary modules
!pip install -q linearmodels
import pandas as pd
from linearmodels.panel import PanelOLS, RandomEffects, compare
from google.colab import files

# Step 2: Upload CSV, load dataset and prepare panel structure
uploaded = files.upload()
df = pd.read_csv('7.3_your_project_data.csv')
df.set_index(['Firm', 'Year'], inplace=True)  # Set hierarchical index for panel data
print(df.head())

# Step 3: Prepare variables for regression
df['const'] = 1
y = df['Profit']
X = df[['const', 'R&D_Spend']]

# Step 4: Estimate fixed effects model
model_fe = PanelOLS(y, X, entity_effects=True)
results_fe = model_fe.fit()
print(results_fe.summary)

# Step 5: Estimate random effects model
model_re = RandomEffects(y, X)
results_re = model_re.fit()
print(results_re.summary)

# Step 6: Compare models using the Hausman test
hausman = compare({'FE': results_fe, 'RE': results_re})
print(hausman)


Saving 7.3_your_project_data.csv to 7.3_your_project_data.csv
           Profit  R&D_Spend
Firm Year                   
A    2004  131.31      12.75
     2005  138.52      12.97
     2006  128.19      11.65
     2007  138.99      14.37
     2008  130.41      11.30
                          PanelOLS Estimation Summary                           
Dep. Variable:                 Profit   R-squared:                        0.4132
Estimator:                   PanelOLS   R-squared (Between):              0.3711
No. Observations:                 100   R-squared (Within):               0.4132
Date:                Wed, Jun 11 2025   R-squared (Overall):              0.3719
Time:                        15:33:42   Log-likelihood                   -278.63
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      66.188
Entities:                           5   P-value                           0.0000
Avg Ob