# Model â€” Naive TWFE DiD (Displacement)

Load `data/processed/panel_displacement.csv`, convert grouping/time to categorical, run a naive TWFE DiD (attack_count ~ treated_post + C(group_name) + C(time_period)), and save results to `reports/tables/naive_displacement.txt`.

In [1]:
from pathlib import Path
import os
import pandas as pd

def find_repo_root(start=Path.cwd()):
    p = start
    while True:
        if (p / 'notebooks').exists() or (p / '.git').exists() or (p / 'README.md').exists():
            return p
        if p == p.parent:
            raise FileNotFoundError('Could not find repository root from cwd')
        p = p.parent

ROOT = find_repo_root()
print('Repository root:', ROOT)

# ensure reports/tables exists when saving results
(ROOT / 'reports' / 'tables').mkdir(parents=True, exist_ok=True)

Repository root: /Users/kanishkraghavendra/Documents/Project/data-minds-causal-analysis


In [2]:
# Load the processed panel for displacement
data_path = ROOT / 'data' / 'processed' / 'panel_displacement.csv'
print('Loading:', data_path)
if not data_path.exists():
    raise FileNotFoundError(f'Expected dataset not found at {data_path!s}. Run notebooks/01_Data_Cleaning_and_Aggregation.ipynb first to produce it.')
df = pd.read_csv(data_path.as_posix(), parse_dates=['date'])

# Harmonize grouping column: prefer 'group_name' but fall back to 'target_type' if present
if 'group_name' not in df.columns:
    if 'target_type' in df.columns:
        df['group_name'] = df['target_type']
    else:
        raise KeyError('Neither group_name nor target_type found in displacement panel')

# Ensure attack_count column exists (some panels name it attack_count_monthly)
if 'attack_count' not in df.columns:
    if 'attack_count_monthly' in df.columns:
        df['attack_count'] = df['attack_count_monthly']
    else:
        raise KeyError('attack_count not found in displacement panel')

# Convert group_name to categorical
df['group_name'] = df['group_name'].astype('category')

# Convert date to a monthly time_period categorical variable
df['time_period'] = pd.to_datetime(df['date'], errors='coerce').dt.to_period('M').astype(str)
df['time_period'] = df['time_period'].astype('category')

print('Loaded dataframe shape:', df.shape)
df.head()

Loading: /Users/kanishkraghavendra/Documents/Project/data-minds-causal-analysis/data/processed/panel_displacement.csv
Loaded dataframe shape: (648, 11)


Unnamed: 0,target_type,date,attack_count_monthly,nkill_monthly,nwound_monthly,post,treated,treated_post,group_name,attack_count,time_period
0,Hard,1990-01-01,122,165.0,232.0,0,1,0,Hard,122,1990-01
1,Hard,1990-02-01,102,183.0,126.0,0,1,0,Hard,102,1990-02
2,Hard,1990-03-01,161,254.0,255.0,0,1,0,Hard,161,1990-03
3,Hard,1990-04-01,133,205.0,262.0,0,1,0,Hard,133,1990-04
4,Hard,1990-05-01,149,159.0,238.0,0,1,0,Hard,149,1990-05


In [3]:
# Run naive TWFE DiD using statsmodels OLS with factor (categorical) fixed effects
import statsmodels.formula.api as smf

formula = 'attack_count ~ treated_post + C(group_name) + C(time_period)'
print('Formula:', formula)

model = smf.ols(formula=formula, data=df).fit()
print(model.summary())

# Save the regression summary to reports/tables/naive_displacement.txt
out_path = ROOT / 'reports' / 'tables' / 'naive_displacement.txt'
with open(out_path.as_posix(), 'w') as f:
    f.write(model.summary().as_text())

print('Saved regression summary to', out_path)

Formula: attack_count ~ treated_post + C(group_name) + C(time_period)
                            OLS Regression Results                            
Dep. Variable:           attack_count   R-squared:                       0.968
Model:                            OLS   Adj. R-squared:                  0.935
Method:                 Least Squares   F-statistic:                     29.63
Date:                Wed, 12 Nov 2025   Prob (F-statistic):          3.47e-147
Time:                        22:15:39   Log-Likelihood:                -3224.0
No. Observations:                 648   AIC:                             7100.
Df Residuals:                     322   BIC:                             8559.
Df Model:                         325                                         
Covariance Type:            nonrobust                                         
                                coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

In [None]:
# Quick check: show where file was written and a few key stats
out_path = ROOT / 'reports' / 'tables' / 'naive_displacement.txt'
print('Result file exists:', out_path.exists())
print('\nRegression coefficients (head):')
print(model.params.head())

SyntaxError: unterminated string literal (detected at line 4) (3690999078.py, line 4)