In [1]:
import pandas as pd
import numpy as np
from linearmodels import OLS
from linearmodels.iv.results import compare
import os

No fixed affects, No stationary contrls

In [2]:
# Load the data
#data = pd.read_csv('gvcobp_transformed1.csv')
data = pd.read_csv('gvcobp_data.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS','decade','logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']

# Replace infinite values with NaN
data[columns_to_check] = data[columns_to_check].replace([np.inf, -np.inf], np.nan)

# Drop rows with NaN in these columns
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Define the dependent variable
dependent = data['onset2COWCS']

# List of variables to regress on
variables = ['oilreserves']

# DataFrame to store results
results_data = []

# Loop through each variable and its corresponding instrument and run the regression for both instruments
for var in variables:
    formula = f'onset2COWCS ~ 1 + {var} + decade + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff'
    model_ols = OLS.from_formula(formula, data).fit()
    #print(results_iv)
    # Add the results to the list
    results_data.append({
        'Variable': var,
        'Coefficient': model_ols.params[var],
        'Std Error': model_ols.std_errors[var],
        'P-value': model_ols.pvalues[var]
    })

# Convert list to DataFrame and set the MultiIndex
results_df = pd.DataFrame(results_data)
results_df = results_df.set_index('Variable')

# Output to file
results_df.to_csv('bp_results.csv')

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(


NO Fixed effects, YES stationary controls

In [3]:
# Load the data
#data = pd.read_csv('gvcobp_transformed1.csv')
data = pd.read_csv('gvcobp_data.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS','decade',
                         'logmountain', 'ethnic_fractionalization',
                         'religion_fractionalization', 'language_fractionalization',
                         'leg_british', 'opec',
                         'logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']

# Replace infinite values with NaN
data[columns_to_check] = data[columns_to_check].replace([np.inf, -np.inf], np.nan)

# Drop rows with NaN in these columns
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Define the dependent variable
dependent = data['onset2COWCS']

# List of variables to regress on
variables = ['oilreserves']

# DataFrame to store results
results_data = []

# Loop through each variable and its corresponding instrument and run the regression for both instruments
for var in variables:
    formula = f'onset2COWCS ~ 1 + {var} + decade + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff + logmountain + ethnic_fractionalization + religion_fractionalization + language_fractionalization + leg_british + opec'
    model_ols = OLS.from_formula(formula, data).fit()
       
    # Add the results to the list
    results_data.append({
        'Variable': var,
        'Coefficient': model_ols.params[var],
        'Std Error': model_ols.std_errors[var],
        'P-value': model_ols.pvalues[var]
    })

results_df = pd.DataFrame(results_data)

results_df.to_csv('bp_results.csv', mode='a', header=False, index=False)

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(


YES Fixed effects, NO stationary controls

In [4]:
# Load the data
#data = pd.read_csv('gvcobp_transformed1.csv')
data = pd.read_csv('gvcobp_data.csv')

# Drop rows with missing values in any of the columns used in the regression
columns_to_check = ['onset2COWCS','decade','logpop_M_diff', 'logpopdens_diff', 'logoutreg_diff', 'ecgrowth_demeaned', 'democracy_diff']

# Replace infinite values with NaN
data[columns_to_check] = data[columns_to_check].replace([np.inf, -np.inf], np.nan)

# Drop rows with NaN in these columns
data = data.dropna(subset=columns_to_check)

# Set the MultiIndex
data = data.set_index(['country', 't'])

# Now you can create your dummies
country_dummies = pd.get_dummies(data.index.get_level_values('country'), drop_first=True, prefix='C').astype(str)
year_dummies = pd.get_dummies(data.index.get_level_values('t'), drop_first=True, prefix='Y').astype(str)
dummies = pd.concat([country_dummies, year_dummies], axis=1)

# Reset the index for 'data' and 'dummies', and then concatenate along axis=1
data = data.reset_index()
data_with_dummies = pd.concat([data, dummies], axis=1)

# Restore the original MultiIndex
data_with_dummies.set_index(['country', 't'], inplace=True)

# List of variables to regress on
variables = ['oilreserves']

# DataFrame to store results
results_data = []

# Loop through each variable and its corresponding instrument and run the regression for both instruments
for var in variables:
        fixed_effects = ' + '.join(dummies.columns)
        formula = f'onset2COWCS ~ 1 + {var} + logpop_M_diff + logpopdens_diff + logoutreg_diff + ecgrowth_demeaned + democracy_diff + {fixed_effects}'
        model_ols = OLS.from_formula(formula, data_with_dummies).fit()
        
        # Add the results to the list
        results_data.append({
            'Variable': var,
            'Coefficient': model_ols.params[var],
            'Std Error': model_ols.std_errors[var],
            'P-value': model_ols.pvalues[var]
        })

# add to results
results_df = pd.DataFrame(results_data)
 
results_df.to_csv('mix_results.csv', mode='a', header=False, index=False)

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(


ValueError: regressors [exog endog] do not have full column rank

In [5]:
databp = pd.read_csv('bp_results.csv')
datafp = pd.read_csv('fp_results.csv')
datamix = pd.read_csv('mix_results.csv')

databp = databp[databp['Variable'] == 'oilreserves']
datafp = datafp[datafp['Variable'] == 'oilreserves']
datamix = datamix[datamix['Variable'] == 'oilreserves']

fuel_data = pd.concat([databp, datafp, datamix], ignore_index=True)
fuel_data.to_csv('fuel_results.csv')

