In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from statsmodels.formula.api import logit

# Load the datasets
tradhist_df = pd.read_stata('/content/TRADHIST_WP.dta')
inflation_css_df = pd.read_stata('/content/Inflation_CSS.dta')

# Merge the datasets based on 'year'
merged_df = pd.merge(tradhist_df, inflation_css_df, on='year', how='inner')

# Create a binary treatment variable based on your criteria
merged_df['treatment'] = (merged_df['IPTOT_o'] > merged_df['IPTOT_o'].median()).astype(int)

# Calculate propensity scores
model = logit("treatment ~ cpicss", merged_df).fit()
merged_df['propensity_score'] = model.predict()

# Split the data into treated and control groups
treated = merged_df[merged_df['treatment'] == 1]
control = merged_df[merged_df['treatment'] == 0]

# Use NearestNeighbors from sklearn for matching
nn = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(control[['propensity_score']])

# For each treated instance, find its nearest neighbor in the control group
_, indices = nn.kneighbors(treated[['propensity_score']])

# Extract matched control instances
matched_control_indices = indices.flatten()
matched_control = control.iloc[matched_control_indices]

# Concatenate matched treated and control instances for analysis
matched_df = pd.concat([treated, matched_control])

# matched_df now contains the matched treatment and control units for further analysis


Optimization terminated successfully.
         Current function value: 0.689349
         Iterations 3
