# Forest Loss & Trade Policy Impact Analysis (DiD Model)
Prepared on 2025-07-22

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf


ImportError: cannot import name '_lazywhere' from 'scipy._lib._util' (/Users/mustakahmad/Library/CloudStorage/OneDrive-purdue.edu/FACAI LAB/Project3_tariff/proj3env/lib/python3.12/site-packages/scipy/_lib/_util.py)

## Load Cleaned Data

In [None]:
df_loss_brazil = pd.read_csv("/Users/mustakahmad/Library/CloudStorage/OneDrive-purdue.edu/FACAI LAB/Project3_tariff/tariff_war/raw_data/2025_22july_clean_forest_loss_brazil.csv")
df_loss_brazil.head()

In [None]:
df_loss_brazil.state.unique()

## Difference-in-Differences Regression

In [None]:
# Define treatment: top 5 most impacted states
top5 = ["Pará", "Mato Grosso", "Rondônia", "Amazonas", "Acre"]
df_loss_brazil['Treated'] = df_loss_brazil['state'].apply(lambda x: 1 if x in top5 else 0)
df_loss_brazil['Post'] = df_loss_brazil['year'].apply(lambda y: 1 if y >= 2019 else 0)
df_loss_brazil['DiD'] = df_loss_brazil['Treated'] * df_loss_brazil['Post']

# Run DiD model
model = smf.ols('forest_loss_ha ~ Treated + Post + DiD', data=df_loss_brazil).fit()
print(model.summary())


## Forest Loss Over Time by Group

In [None]:
avg_loss_by_group = df_loss_brazil.groupby(['year', 'Treated'])['forest_loss_ha'].mean().reset_index()
avg_loss_by_group['Group'] = avg_loss_by_group['Treated'].map({1: 'Treated (Top 5 States)', 0: 'Control (Other States)'})

plt.figure(figsize=(12, 6))
sns.lineplot(data=avg_loss_by_group, x='year', y='forest_loss_ha', hue='Group', marker='o')
plt.axvline(2018.5, color='black', linestyle='--', label='Tariff Introduced (2019)')
plt.title('Forest Loss Trends: Treated vs. Control (2001–2023)')
plt.ylabel('Avg Forest Loss (ha)')
plt.xlabel('Year')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


## DiD Actual vs Counterfactual Trend

In [None]:
pre = df_loss_brazil[df_loss_brazil['Post'] == 0]
treated_pre_avg = pre[pre['Treated'] == 1]['forest_loss_ha'].mean()
control_pre_avg = pre[pre['Treated'] == 0]['forest_loss_ha'].mean()
gap = treated_pre_avg - control_pre_avg

control_trend = df_loss_brazil[df_loss_brazil['Treated'] == 0].groupby('year')['forest_loss_ha'].mean().reset_index()
control_trend['counterfactual_treated'] = control_trend['forest_loss_ha'] + gap
treated_trend = df_loss_brazil[df_loss_brazil['Treated'] == 1].groupby('year')['forest_loss_ha'].mean().reset_index()

cf_merge = pd.merge(control_trend[['year', 'counterfactual_treated']], treated_trend, on='year')
cf_merge.rename(columns={'forest_loss_ha': 'actual_treated'}, inplace=True)

plt.figure(figsize=(12, 6))
plt.plot(cf_merge['year'], cf_merge['actual_treated'], label='Actual (Treated)', marker='o')
plt.plot(cf_merge['year'], cf_merge['counterfactual_treated'], label='Counterfactual (No Tariff)', linestyle='--', marker='o')
plt.axvline(2018.5, color='black', linestyle='--', label='Tariff Introduced')
plt.title('Difference-in-Differences: Actual vs. Counterfactual')
plt.xlabel('Year')
plt.ylabel('Avg Forest Loss (ha)')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
