## Fixed Effect Model (i really pray it works)

In [53]:
import pandas as pd
import numpy as np
from linearmodels.panel import PanelOLS

In [54]:
#import data first
training_data=pd.read_csv('../data/final/training_model_data.csv',header=0)
test_data=pd.read_csv('../data/final/incompl_test_model_data.csv',header=0)
test_data=test_data[training_data.columns]
combined=pd.concat([training_data,test_data],axis=0)


In [55]:
un_data=pd.read_csv('../data/final/FBIC_sentiment_comtrade_un.csv',header=0)
un_data=un_data[['iso3a','iso3b','IdealPointDistance','year']]
un_data=un_data.rename(columns={'iso3a':'country_a','iso3b':'country_b','IdealPointDistance':'un_dist'})
combined=combined.merge(un_data,how='left',on=['country_a','country_b','year'])

In [56]:
df = combined.melt(
    id_vars=['year','country_a','country_b','tradeagreementindex','sentiment_index','un_dist'],
    value_vars=['bec_1','bec_2','bec_3','bec_4','bec_5','bec_6','bec_7','bec_8'],
    var_name='sector',
    value_name='value'
)

In [57]:
test_df = pd.merge(
    df, df,
    left_on=['country_a','country_b','year','sector'],
    right_on=['country_b','country_a','year','sector'],
    how='outer',
    suffixes=('_export', '_import')
)

In [6]:
test_df.head()

Unnamed: 0,year,country_a_export,country_b_export,tradeagreementindex_export,sentiment_index_export,un_dist_export,sector,value_export,country_a_import,country_b_import,tradeagreementindex_import,sentiment_index_import,un_dist_import,value_import
0,2006,ARE,AUS,0,0.548505,2.36814,bec_1,0.0,AUS,ARE,0,0.716185,2.36814,190512600.0
1,2006,ARE,AUS,0,0.548505,2.36814,bec_2,0.0,AUS,ARE,0,0.716185,2.36814,770182600.0
2,2006,ARE,AUS,0,0.548505,2.36814,bec_3,0.0,AUS,ARE,0,0.716185,2.36814,124810200.0
3,2006,ARE,AUS,0,0.548505,2.36814,bec_4,0.0,AUS,ARE,0,0.716185,2.36814,149286400.0
4,2006,ARE,AUS,0,0.548505,2.36814,bec_5,0.0,AUS,ARE,0,0.716185,2.36814,249071400.0


In [58]:
test_df['sentiment_index'] = 0.5 * (test_df['sentiment_index_export'] + test_df['sentiment_index_import'])
test_df=test_df.drop(['country_a_import','country_b_import','sentiment_index_export','sentiment_index_import','tradeagreementindex_import','un_dist_import'],axis=1)

In [59]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
test_df['tradeagreementindex_export']=scaler.fit_transform(test_df['tradeagreementindex_export'].values.reshape(-1,1))
test_df['sentiment_index']=scaler.fit_transform(test_df['sentiment_index'].values.reshape(-1,1))
test_df['D']=1+(-1)*0.5*test_df['tradeagreementindex_export']+(-1)*0.5*test_df['sentiment_index']
test_df=test_df.drop(columns=['sentiment_index','tradeagreementindex_export'],axis=1)

In [60]:
test_df.head()

Unnamed: 0,year,country_a_export,country_b_export,un_dist_export,sector,value_export,value_import,D
0,2006,ARE,AUS,2.36814,bec_1,0.0,190512600.0,0.666298
1,2006,ARE,AUS,2.36814,bec_2,0.0,770182600.0,0.666298
2,2006,ARE,AUS,2.36814,bec_3,0.0,124810200.0,0.666298
3,2006,ARE,AUS,2.36814,bec_4,0.0,149286400.0,0.666298
4,2006,ARE,AUS,2.36814,bec_5,0.0,249071400.0,0.666298


In [61]:
#split up the datasets from here (this is with un records)
original=test_df[['year','country_a_export','country_b_export','un_dist_export','sector','value_export']]
original=original.rename(columns={'un_dist_export':'D','country_a_export':'country_a','country_b_export':'country_b','value_export':'value'})
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
original['D']=scaler.fit_transform(original['D'].values.reshape(-1,1))
df=original


In [65]:
#this is from composite D
new=test_df[['year','country_a_export','country_b_export','D','sector','value_export']]
new=new.rename(columns={'country_a_export':'country_a','country_b_export':'country_b','value_export':'value'})
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
new['D']=scaler.fit_transform(new['D'].values.reshape(-1,1))
df=new

## NOTE

United Arab Empire was removed because most of the trade volumes here are 0..

In [66]:
df = df[(df['country_a'] != 'ARE') & (df['country_b'] != 'ARE')]
df=df[(df['year']>=2017) & (df['year']<=2023)]

In [67]:
# Prepare the data
df = df.sort_values(['country_a', 'country_b', 'sector', 'year'])
df['log_export'] = np.log(df['value'])
df['delta_log_export'] = df.groupby(['country_a', 'country_b', 'sector'])['log_export'].diff()

# Create entity ID for panel structure
df['entity_id'] = df['country_a'] + '_' + df['country_b'] + '_' + df['sector']



## pushing into fixed effect model

In [68]:
from linearmodels.iv import AbsorbingLS

# check for data completeness
df_reg = df.dropna(subset=['delta_log_export'])

# Create the sector-time interaction with D
df_reg['sector_year'] = df_reg['sector'] + '_' + df_reg['year'].astype(str)
for sy in df_reg['sector_year'].unique():
    df_reg[f'D_{sy}'] = np.where(df_reg['sector_year'] == sy, df_reg['D'], 0)

# Get all D interaction columns
D_cols = [col for col in df_reg.columns if col.startswith('D_')]
df_reg['importer_fe'] = df_reg['country_b'].astype(str) + '_' + df_reg['sector'].astype(str) + '_' + df_reg['year'].astype(str)
df_reg['exporter_fe'] = df_reg['country_a'].astype(str) + '_' + df_reg['sector'].astype(str) + '_' + df_reg['year'].astype(str)
df_reg['importer_fe'] = df_reg['importer_fe'].astype('category')
df_reg['exporter_fe'] = df_reg['exporter_fe'].astype('category')

from linearmodels.iv import AbsorbingLS

X = df_reg[D_cols]                          # D interaction terms
absorbs = df_reg[['importer_fe', 'exporter_fe']]  

mod = AbsorbingLS(
    df_reg['delta_log_export'],
    X,
    absorb=absorbs
)

res = mod.fit(
    cov_type='clustered',
    clusters=df_reg['country_a'] + '_' + df_reg['country_b']   # (i,j) pair
)
print(res.summary)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reg['sector_year'] = df_reg['sector'] + '_' + df_reg['year'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reg[f'D_{sy}'] = np.where(df_reg['sector_year'] == sy, df_reg['D'], 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reg[f'D_{sy}'] = np.where(df_reg['sector_year'] == s

                         Absorbing LS Estimation Summary                          
Dep. Variable:       delta_log_export   R-squared:                          0.7175
Estimator:               Absorbing LS   Adj. R-squared:                     0.6758
No. Observations:               13056   F-statistic:                        248.46
Date:                Fri, Apr 11 2025   P-value (F-stat):                   0.0000
Time:                        23:37:31   Distribution:                     chi2(48)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0116
                                        Variables Absorbed:                 1631.0
                              Parameter Estimates                               
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
D_bec_1_2018    -0.1140     0.0736    -1.5493     0.1213     -0.2581      0.0302
D_bec_1_2019