In [1]:
import pandas as pd
import numpy as np
from linearmodels import OLS
from linearmodels.iv.results import compare
import os

No fixed affects, No stationary contrls

In [None]:
# Load the data
data = pd.read_csv('gvcomix_data.csv')
# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['s6','milexp_pergdpSIPRI','decade','logpopdens_diff', 'ecgrowth_demeaned', 'democracy_diff']

# Replace infinite values with NaN
data[columns_to_check] = data[columns_to_check].replace([np.inf, -np.inf], np.nan)

# Drop rows with NaN in these columns
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Define the dependent variable
dependent = data['milexp_pergdpSIPRI']

# List of variables to regress on
variables = ['s6']

# DataFrame to store results
results_data = []

# Loop through each variable and its corresponding instrument and run the regression for both instruments
for var in variables:
    formula = f'milexp_pergdpSIPRI ~ 1 + {var} + decade + logpopdens_diff + ecgrowth_demeaned + democracy_diff'
    model_ols = OLS.from_formula(formula, data).fit()
    #print(results_iv)
    # Add the results to the list
    results_data.append({
        'Variable': var,
        'Coefficient': model_ols.params[var],
        'Std Error': model_ols.std_errors[var],
        'P-value': model_ols.pvalues[var]
    })

# Convert list to DataFrame and set the MultiIndex
results_df = pd.DataFrame(results_data)
results_df = results_df.set_index('Variable')

# Output to file
results_df.to_csv('mix_results.csv')

NO Fixed effects, YES stationary controls

In [None]:
# Load the data
data = pd.read_csv('gvcomix_data.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['s6','milexp_pergdpSIPRI','decade',
                         'logmountain', 'ethnic_fractionalization',
                         'religion_fractionalization', 'language_fractionalization',
                         'leg_british', 'opec',
                         'logpopdens_diff', 'ecgrowth_demeaned', 'democracy_diff']

# Replace infinite values with NaN
data[columns_to_check] = data[columns_to_check].replace([np.inf, -np.inf], np.nan)

# Drop rows with NaN in these columns
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Define the dependent variable
dependent = data['milexp_pergdpSIPRI']

# List of variables to regress on
variables = ['s6']

# DataFrame to store results
results_data = []

# Loop through each variable and its corresponding instrument and run the regression for both instruments
for var in variables:
    formula = f'milexp_pergdpSIPRI ~ 1 + {var} + decade + logpopdens_diff + ecgrowth_demeaned + democracy_diff + logmountain + ethnic_fractionalization + religion_fractionalization + language_fractionalization + leg_british + opec'
    model_ols = OLS.from_formula(formula, data).fit()
       
    # Add the results to the list
    results_data.append({
        'Variable': var,
        'Coefficient': model_ols.params[var],
        'Std Error': model_ols.std_errors[var],
        'P-value': model_ols.pvalues[var]
    })

results_df = pd.DataFrame(results_data)

results_df.to_csv('mix_results.csv', mode='a', header=False, index=False)

YES Fixed effects, NO stationary controls

In [None]:
import pandas as pd
import numpy as np
from statsmodels.regression.linear_model import OLS

# Load the data
data = pd.read_csv('gvcomix_data.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['milexp_pergdpSIPRI', 's6','decade', 'logpop_M_diff', 'ecgrowth_demeaned', 'democracy_diff']
data[columns_to_check] = data[columns_to_check].replace([np.inf, -np.inf], np.nan)
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Create country and year codes
data['Ccode'] = pd.factorize(data.index.get_level_values('country'), sort=True)[0] + 1
data['Ycode'] = pd.factorize(data.index.get_level_values('t'), sort=True)[0] + 1

# Create a unique cluster ID for each country-year combination
data['ClusterID'] = data['Ccode'].astype(str) + '_' + data['Ycode'].astype(str)

# Reset the index for 'data'
data = data.reset_index()

# List of variables to regress on
variables = ['s6']

# DataFrame to store results
results_data = []

# Loop through each variable and run the regression
for var in variables:
    formula = f'milexp_pergdpSIPRI ~ 1 + {var} + logpop_M_diff + ecgrowth_demeaned + democracy_diff + C(Ccode) + C(Ycode)'
# Specify the correct clustering
    model_ols = OLS.from_formula(formula, data).fit(cov_type='cluster', cov_kwds={'groups': data['ClusterID']})

# Add the results to the list
    results_data.append({
    'Variable': var,
    'Coefficient': model_ols.params[var],
    'Std Error': model_ols.bse[var],
    'P-value': model_ols.pvalues[var]
    })

# Convert to DataFrame and save results
results_df = pd.DataFrame(results_data)
results_df.to_csv('mix_results.csv', mode='a', header=False, index=False)