In [66]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col
from linearmodels.panel import PanelOLS

In [67]:
df = pd.read_excel('Data Finale_balanced_withMinimumWage.xlsx')
df.head()

Unnamed: 0,Country,Year,Monthly Minimum Wage,CollectiveBargain_Coverage,TradeUnions_Density,Real Average Annual Wage Growth,Annual Inflation CPI,Gini Index,Employment/population ratio Women age 15 to 64,Labour force participation rate (Women age 15 to 64),...,"Children out of school, primary",Current health expenditure (% of GDP),Exports of goods and services (annual % growth),"Foreign direct investment, net inflows (% of GDP)","Foreign direct investment, net outflows (% of GDP)",Imports of goods and services (% of GDP),"Labor force, total",Labor tax and contributions (% of commercial profits),"Part time employment, total (% of total employment)",GDP (current US$)
0,Austria,1990,0.0,98.0,46.8,3.13,3.26,30.8,,61.32,...,4616,9.2,8.57,0.39,1.02,35.79,3569407,35.3,20.4,166463400000.0
1,Austria,1991,0.0,98.0,45.5,3.13,3.34,30.8,,61.32,...,4616,9.2,2.94,0.21,0.74,35.46,3569407,35.3,20.4,173794200000.0
2,Austria,1992,0.0,98.0,44.5,2.01,4.02,30.8,,61.32,...,4616,9.2,1.32,0.76,0.9,34.47,3652067,35.3,20.4,195078100000.0
3,Austria,1993,0.0,98.0,44.0,0.82,3.63,30.8,,61.32,...,4616,9.2,-2.37,0.59,0.62,31.8,3684501,35.3,20.4,190379700000.0
4,Austria,1994,0.0,98.0,41.5,1.01,2.95,30.8,,61.32,...,4616,9.2,5.69,1.02,0.61,33.83,3851283,35.3,20.4,203535200000.0


In [68]:
print(df.columns)

Index(['Country', 'Year', 'Monthly Minimum Wage', 'CollectiveBargain_Coverage',
       'TradeUnions_Density', 'Real Average Annual Wage Growth',
       'Annual Inflation CPI', 'Gini Index',
       'Employment/population ratio Women age 15 to 64',
       'Labour force participation rate (Women age 15 to 64)',
       'Unemployment rate (Women age 15 to 64)',
       'Central government debt, total (% of GDP)',
       'Children out of school, primary',
       'Current health expenditure (% of GDP)',
       'Exports of goods and services (annual % growth)',
       'Foreign direct investment, net inflows (% of GDP)',
       'Foreign direct investment, net outflows (% of GDP)',
       'Imports of goods and services (% of GDP)', 'Labor force, total',
       'Labor tax and contributions (% of commercial profits)',
       'Part time employment, total (% of total employment)',
       'GDP (current US$)'],
      dtype='object')


In [69]:
df.rename(columns={
    "Labor force, total": "Labor_force",
    "GDP (current US$)": "GDP",
    "Annual Inflation CPI": "Inflation",
    "Real Average Annual Wage Growth": "WageGrowth",
    "Labour force participation rate (Women age 15 to 64)": "Female_Workers",
    "Trade unions density": "TradeUnions_Density",
    "Central government debt, total (% of GDP)": "Gov_debt",
    "Unemployment rate (Women age 15 to 64)": "WomenUnemployment_rate",
    "Current health expenditure (% of GDP)": "Health_expenditure",
    "Exports of goods and services (annual % growth)": "Exports_growth",
    "Imports of goods and services (% of GDP)": "Imports_growth",
    "Foreign direct investment, net inflows (% of GDP)": "FDI_inflow",
    "Foreign direct investment, net outflows (% of GDP)": "FDI_outflow",
    "Labor tax and contributions (% of commercial profits)": "Tax_contribution",
    "Part time employment, total (% of total employment)": "Parttime_employment"
}, inplace=True)
df.reset_index(inplace=True)

## Regression

In [70]:
# Define your independent variables for each regression
model1 = ['CollectiveBargain_Coverage', 'Inflation', 'WageGrowth']
model2 = ['CollectiveBargain_Coverage', 'Inflation', 'WageGrowth', 'TradeUnions_Density']
model3 = ['CollectiveBargain_Coverage', 'Inflation', 'WageGrowth', 'TradeUnions_Density', 'GDP', 'Gov_debt', 'Health_expenditure']
model4 = ['CollectiveBargain_Coverage', 'Inflation', 'WageGrowth', 'TradeUnions_Density', 'GDP', 'Gov_debt', 'Health_expenditure', 'Labor_force', 'Female_Workers', 'Parttime_employment', 'WomenUnemployment_rate']
model5 = ['CollectiveBargain_Coverage', 'Inflation', 'WageGrowth', 'TradeUnions_Density', 'GDP', 'Gov_debt', 'Health_expenditure', 'Labor_force', 'Female_Workers', 'Parttime_employment', 'WomenUnemployment_rate', 'Exports_growth', 'Imports_growth']
model6 = ['CollectiveBargain_Coverage', 'Inflation', 'WageGrowth', 'TradeUnions_Density', 'GDP', 'Gov_debt', 'Health_expenditure', 'Labor_force', 'Female_Workers', 'Parttime_employment', 'WomenUnemployment_rate', 'Exports_growth', 'Imports_growth', 'FDI_inflow', 'FDI_outflow', 'Tax_contribution']

# List of all independent variables
modelli = [model1, model2, model3, model4, model5, model6]

In [71]:
# Define the order of the variables
regressor_order = ['CollectiveBargain_Coverage', 'Inflation', 'WageGrowth', 'TradeUnions_Density', 'GDP', 'Gov_debt', 'Health_expenditure']

# List to store models
models = []

# Run each regression
for i, independent_vars in enumerate(all_independent_vars, start=1):
    X = sm.add_constant(df[independent_vars])  # Add a constant to the independent variables
    y = df['Gini Index']

    model = sm.OLS(y, X).fit()
    models.append(model)

# Create a summary table of all models
summary_table = summary_col(models, stars=True, float_format='%0.2f',
                            model_names=['Model 1', 'Model 2', 'Model 3', 'Model 4', 'Model 5','Model 6'],
                            info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                                       'R2':lambda x: "{:.2f}".format(x.rsquared)},
                            regressor_order=regressor_order)

print(summary_table)


                           Model 1  Model 2  Model 3  Model 4  Model 5  Model 6 
--------------------------------------------------------------------------------
CollectiveBargain_Coverage 0.00     0.02**   0.09***  0.06***  0.07***  0.05*** 
                           (0.01)   (0.01)   (0.01)   (0.01)   (0.02)   (0.02)  
Inflation                  0.19*    0.18**   0.06     0.07     0.10     0.09    
                           (0.10)   (0.07)   (0.07)   (0.06)   (0.06)   (0.06)  
WageGrowth                 -0.24**  -0.02    -0.12*   -0.11    -0.09    -0.10   
                           (0.11)   (0.08)   (0.07)   (0.07)   (0.07)   (0.07)  
TradeUnions_Density                 -0.12*** -0.12*** -0.10*** -0.10*** -0.09***
                                    (0.01)   (0.01)   (0.01)   (0.01)   (0.01)  
GDP                                          0.00     0.00***  0.00***  0.00*** 
                                             (0.00)   (0.00)   (0.00)   (0.00)  
Gov_debt                   

## Fixed Effects on Years and Country


In [72]:
# Define the order of the variables
regressor_order = ['CollectiveBargain_Coverage', 'Inflation', 'WageGrowth', 'TradeUnions_Density', 'GDP', 'Gov_debt', 'Health_expenditure']

models = []

for i, independent_vars in enumerate(modelli, start=1):
    X = sm.add_constant(df[independent_vars])  
    y = df['Gini Index']

    model = sm.OLS(y, X).fit()
    models.append(model)

# Create a summary table of all models
summary_table2 = summary_col(models, stars=True, float_format='%0.2f',
                            model_names=['Model 1', 'Model 2', 'Model 3', 'Model 4', 'Model 5', 'Model 6'],
                            info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                                       'R2':lambda x: "{:.2f}".format(x.rsquared)},
                            regressor_order=regressor_order)

summary_table2

0,1,2,3,4,5,6
,Model 1,Model 2,Model 3,Model 4,Model 5,Model 6
CollectiveBargain_Coverage,0.00,0.02**,0.09***,0.06***,0.07***,0.05***
,(0.01),(0.01),(0.01),(0.01),(0.02),(0.02)
Inflation,0.19*,0.18**,0.06,0.07,0.10,0.09
,(0.10),(0.07),(0.07),(0.06),(0.06),(0.06)
WageGrowth,-0.24**,-0.02,-0.12*,-0.11,-0.09,-0.10
,(0.11),(0.08),(0.07),(0.07),(0.07),(0.07)
TradeUnions_Density,,-0.12***,-0.12***,-0.10***,-0.10***,-0.09***
,,(0.01),(0.01),(0.01),(0.01),(0.01)
GDP,,,0.00,0.00***,0.00***,0.00***
