<a href="https://colab.research.google.com/github/ChacoGolden/Stat-Data-Science/blob/main/Reg_FE_Edu__Int_Infl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
!pip install linearmodels



In [26]:
import pandas as pd
from linearmodels.panel import PanelOLS
from statsmodels.tools import add_constant

In [37]:
# Load the CSV file
data = pd.read_csv('https://raw.githubusercontent.com/ChacoGolden/Stat-Data-Science/refs/heads/main/Panel_Data_Inc_Edu_Infl.csv')
# Convert 'Waste' from tons to kilograms
data['Waste'] = data['Waste'] * 1000
# Inspect the first few rows of the data
print(data.head())

    Country  Year       Waste   Income  Education  Inflation
0   Belgium  2014  484.672171  21705.0       32.6        0.5
1  Bulgaria  2014  376.962939   3311.0       23.6       -1.6
2   Czechia  2014  310.164578   7622.0       19.1        0.4
3   Denmark  2014  636.289226  27861.0       29.4        0.4
4   Germany  2014  456.714036  19733.0       23.2        0.8


In [38]:
# Convert 'Country' and 'Year' to categorical variables (if not already)
data['Country'] = data['Country'].astype('category')
data['Year'] = data['Year'].astype('category')

# Check for missing values
print(data.isnull().sum())
print(data.head())

Country      0
Year         0
Waste        0
Income       0
Education    0
Inflation    0
dtype: int64
    Country  Year       Waste   Income  Education  Inflation
0   Belgium  2014  484.672171  21705.0       32.6        0.5
1  Bulgaria  2014  376.962939   3311.0       23.6       -1.6
2   Czechia  2014  310.164578   7622.0       19.1        0.4
3   Denmark  2014  636.289226  27861.0       29.4        0.4
4   Germany  2014  456.714036  19733.0       23.2        0.8


In [39]:
data['Year'] = data['Year'].astype(int)  # Or float if years are not integers

# ... (rest of your code) ...

In [40]:
# Set 'Country' and 'Year' as index to create a MultiIndex
data = data.set_index(['Country', 'Year'])

In [41]:
# Define the dependent variable (Y) and independent variables (X)
Y = data['Waste']
X = data[['Income', 'Education', 'Inflation']]

# Add a constant term to the independent variables
X = add_constant(X)

In [42]:
# Run the fixed effects model
model = PanelOLS(dependent=Y, exog=X, entity_effects=True, time_effects=True)
results = model.fit()

# Display the results
print(results.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:                  Waste   R-squared:                        0.0278
Estimator:                   PanelOLS   R-squared (Between):             -0.0287
No. Observations:                 234   R-squared (Within):               0.1464
Date:                Sun, Dec 01 2024   R-squared (Overall):              0.0061
Time:                        13:52:19   Log-likelihood                   -1168.0
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      1.8766
Entities:                          26   P-value                           0.1348
Avg Obs:                       9.0000   Distribution:                   F(3,197)
Min Obs:                       9.0000                                           
Max Obs:                       9.0000   F-statistic (robust):             1.8766
                            

  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  mu = self._frame.groupby(level=level).mean()
  mu = self._frame.groupby(level=level).mean()
  out = self._frame.groupby(level=level).count()
  mu = self._frame.groupby(level=level).mean()
  group_mu = self._frame.groupby(level=level).transform("mean")
  mu = self._frame.groupby(level=level).mean()
  mu = self._frame.groupby(level=level).mean()
  group_mu = self._frame.groupby(level=level).transform("mean")


In [43]:
# Create the interaction term
data['Income_Education'] = data['Income'] * data['Education']

In [44]:
# Define the dependent variable (Y) and independent variables (X)
Y = data['Waste']
X = data[['Income', 'Education', 'Inflation', 'Income_Education']]

# Add a constant term to the independent variables
X = add_constant(X)

# Run the fixed effects model with interaction term
model = PanelOLS(dependent=Y, exog=X, entity_effects=True, time_effects=True)
results = model.fit()

# Display the results
print(results.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:                  Waste   R-squared:                        0.1051
Estimator:                   PanelOLS   R-squared (Between):             -0.6874
No. Observations:                 234   R-squared (Within):               0.2021
Date:                Sun, Dec 01 2024   R-squared (Overall):             -0.5106
Time:                        13:55:47   Log-likelihood                   -1158.3
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      5.7530
Entities:                          26   P-value                           0.0002
Avg Obs:                       9.0000   Distribution:                   F(4,196)
Min Obs:                       9.0000                                           
Max Obs:                       9.0000   F-statistic (robust):             5.7530
                            

  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  mu = self._frame.groupby(level=level).mean()
  mu = self._frame.groupby(level=level).mean()
  out = self._frame.groupby(level=level).count()
  mu = self._frame.groupby(level=level).mean()
  group_mu = self._frame.groupby(level=level).transform("mean")
  mu = self._frame.groupby(level=level).mean()
  mu = self._frame.groupby(level=level).mean()
  group_mu = self._frame.groupby(level=level).transform("mean")


The interaction is negative and significant, meaning the relationship between Income and Waste weakens as Education increases.

In [47]:
import numpy as np

# Transform 'Income' to its logarithm
data['Log_Income'] = np.log(data['Income'])


In [55]:
# Update the interaction term to reflect the log transformation
data['Log_Income_Education'] = data['Log_Income'] * data['Education']
X['Log_Income_Education'] = data['Log_Income_Education']

# Define the dependent variable (Y) and independent variables (X)
Y = data['Waste']
X = data[['Log_Income', 'Education', 'Inflation', 'Log_Income_Education']]



# Add a constant term to the independent variables
X = add_constant(X)

# Run the fixed effects model
model = PanelOLS(dependent=Y, exog=X, entity_effects=True, time_effects=True)
results = model.fit()

# Display the results
print(results.summary)


                          PanelOLS Estimation Summary                           
Dep. Variable:                  Waste   R-squared:                        0.1380
Estimator:                   PanelOLS   R-squared (Between):             -0.8449
No. Observations:                 234   R-squared (Within):               0.1832
Date:                Sun, Dec 01 2024   R-squared (Overall):             -0.6405
Time:                        14:17:28   Log-likelihood                   -1153.9
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      7.8469
Entities:                          26   P-value                           0.0000
Avg Obs:                       9.0000   Distribution:                   F(4,196)
Min Obs:                       9.0000                                           
Max Obs:                       9.0000   F-statistic (robust):             7.8469
                            

  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  mu = self._frame.groupby(level=level).mean()
  mu = self._frame.groupby(level=level).mean()
  out = self._frame.groupby(level=level).count()
  mu = self._frame.groupby(level=level).mean()
  group_mu = self._frame.groupby(level=level).transform("mean")
  mu = self._frame.groupby(level=level).mean()
  mu = self._frame.groupby(level=level).mean()
  group_mu = self._frame.groupby(level=level).transform("mean")


In [51]:
# Log-transform 'Income' and 'Waste'
data['Log_Waste'] = np.log(data['Waste'])
data['Log_Income'] = np.log(data['Income'])
# Update the interaction term to use log-transformed income
data['Log_Income_Education'] = data['Log_Income'] * data['Education']


In [52]:
# Define the dependent variable (Y) and independent variables (X)
Y = data['Log_Waste']
X = data[['Log_Income', 'Education', 'Inflation', 'Log_Income_Education']]

# Add a constant term
X = add_constant(X)

# Run the fixed effects model
model = PanelOLS(dependent=Y, exog=X, entity_effects=True, time_effects=True)
results = model.fit()

# Display the results
print(results.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:              Log_Waste   R-squared:                        0.1598
Estimator:                   PanelOLS   R-squared (Between):             -0.7768
No. Observations:                 234   R-squared (Within):               0.2195
Date:                Sun, Dec 01 2024   R-squared (Overall):             -0.5857
Time:                        14:05:46   Log-likelihood                    244.73
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      9.3210
Entities:                          26   P-value                           0.0000
Avg Obs:                       9.0000   Distribution:                   F(4,196)
Min Obs:                       9.0000                                           
Max Obs:                       9.0000   F-statistic (robust):             9.3210
                            

  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  group_mu = self._frame.groupby(level=level).transform("mean")
  mu = self._frame.groupby(level=level).mean()
  mu = self._frame.groupby(level=level).mean()
  out = self._frame.groupby(level=level).count()
  mu = self._frame.groupby(level=level).mean()
  group_mu = self._frame.groupby(level=level).transform("mean")
  mu = self._frame.groupby(level=level).mean()
  mu = self._frame.groupby(level=level).mean()
  group_mu = self._frame.groupby(level=level).transform("mean")
