# Reinhart-Rogoff replication

* Replication of Reinhart-Rogoff "Growth in a Time of Debt."
* Python port of R code by Thomas Herndon | Michael Ash | Robert Pollin
* https://scholar.harvard.edu/files/rogoff/files/growth_in_time_debt_aer.pdf
* Author: Vincent Arel-Bundock varel@umich.edu
* Data: https://gist.github.com/vincentarelbundock/5409893/raw/a623f2f3bae027a0e51dd01ac5b70d44d909a7b9/RR-processed.csv

In [None]:
import statsmodels.api as sm
import patsy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

RR = pd.read_csv('RR-processed.csv')

## Number of observations per country

In [None]:
RR.groupby('Country').size()

## Bins

In [None]:
bins = ["0-30%","30-60%","60-90%","Above 90%"]
RR['dgcat'] = np.digitize(RR.debtgdp, [0,30,60,90,np.inf]) - 1
RR.dgcat = [bins[x] for x in RR.dgcat]

bins = ["0-30%","30-60%","60-90%","90-120%","Above 120%"]
RR['dgcat2'] = np.digitize(RR.debtgdp, [0,30,60,90,120,np.inf]) - 1
RR.dgcat2 = [bins[x] for x in RR.dgcat2]

## Regression analysis

In [None]:
y,X = patsy.dmatrices('dRGDP ~ dgcat', data=RR[['dRGDP', 'dgcat']].dropna())
print (sm.OLS(y,X).fit().summary())

In [None]:
y2,X2 = patsy.dmatrices('dRGDP ~ dgcat2', data=RR[['dRGDP', 'dgcat2']].dropna())
print (sm.OLS(y2,X2).fit().summary())

## Table 3 Corrected

In [None]:
## Country-Year average by debtgdp ("correct weights")
RR.dRGDP.groupby(RR.dgcat).mean()

In [None]:
## Averaged Country averages by debtgdp ("equal weights")
RR.dRGDP.groupby([RR.Country, RR.dgcat]).mean().unstack()

In [None]:
## Country-Year average by debtgdp ("correct weights") expanded categories
RR.dRGDP.groupby(RR.dgcat2).mean()

In [None]:
## Averaged Country averages by debtgdp ("equal weights")
RR.dRGDP.groupby([RR.Country, RR.dgcat2]).mean().unstack()

## Selective treatment of early years

In [None]:
idx = (RR.Country == 'New Zealand') & (RR.Year < 1950) | (RR.Country == 'Australia') & (RR.Year < 1951) | (RR.Country == 'Canada') & (RR.Year < 1951)
RR_selective = RR[idx == False]
RR_selective.dRGDP.groupby(RR_selective.dgcat).mean()

## Equal weights
## Table 3 Weights,Exclusion

In [None]:
RR_selective.select_dtypes(include=[np.number]).mean()

## Correct weights
## Table 3 Selective years exclusion

In [None]:
RR_selective.dRGDP.groupby([RR_selective.Country, RR_selective.dgcat]).mean().unstack()

## And dropping because of spreadsheet error

In [None]:
drop = ["Australia","Austria","Belgium","Canada","Denmark"]
idx = [False if x in drop else True for x in RR_selective.Country]
RR_selective_spreadsheet = RR_selective[idx]
RR_selective_spreadsheet.dRGDP.groupby(RR.dgcat).mean()

## New Zealand transcription error

In [None]:
a = RR_selective_spreadsheet.Country
b = RR_selective_spreadsheet.dgcat
RR_selective_spreadsheet.dRGDP.groupby(b).mean()

## Medians

In [None]:
RR.dRGDP.groupby(RR.dgcat).median() # Correct, equal weight

In [None]:
RR.dRGDP.groupby(RR.dgcat2).median() # Correct, expanded categories, equal weight

## Counts of years

In [None]:
RR.Country.groupby([RR.Country, RR.dgcat]).size().unstack().sum()

In [None]:
RR_selective.Country.groupby([RR.Country, RR.dgcat]).size().unstack().sum()

In [None]:
RR_selective_spreadsheet.Country.groupby([RR.Country, RR.dgcat]).size().unstack().sum()

## Categorical scatterplot

In [None]:
labels = ["0-30%","30-60%","60-90%","Above 90%"]
dat = [np.array(RR.dRGDP[RR.dgcat==x]) for x in labels]
print(sm.graphics.violinplot(dat, labels=labels))

In [None]:
labels = ["0-30%","30-60%","60-90%","90-120%","Above 120%"]
dat = [np.array(RR.dRGDP[RR.dgcat2==x]) for x in labels]
print(sm.graphics.violinplot(dat, labels=labels))

## Country-Year average by debtgdp for more recent samples


In [None]:
years = range(1950, 2001, 10)
f = lambda x: (x, RR[RR.Year >= x].dRGDP.groupby(RR[RR.Year >= x].dgcat).mean())
[f(x) for x in years]

# Lagged dependent variable

In [None]:
def new_func():
    y, X = patsy.dmatrices('dRGDP ~ dgcat + dRGDP_lag', data=RR[['dRGDP', 'dgcat', 'dRGDP_lag']].dropna())
    print(sm.OLS(y, X).fit().summary())

new_func()


# Fixed effects

In [None]:
# Ensure lagged variable exists
if 'dRGDP_lag' not in RR.columns:
    # Create lagged variable using transform instead of apply to maintain proper indexing
    RR = RR.sort_values(['Country', 'Year'])  # Ensure proper sorting
    RR['dRGDP_lag'] = RR.groupby('Country')['dRGDP'].shift(1)

y,X = patsy.dmatrices('dRGDP ~ dgcat + dRGDP_lag + Country', data=RR[['dRGDP', 'dgcat', 'dRGDP_lag', 'Country']].dropna())
print(sm.OLS(y,X).fit().summary())