# Setup

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
    
import time
import copy
import numpy as np
from sklearn import tree
from io import StringIO
from inspect import getmembers

import matplotlib.pyplot as plt
from cycler import cycler
color_list  = ['#1f77b4','#ff7f0e','#2ca02c','#d62728','#9467bd','#8c564b','#e377c2','#7f7f7f','#bcbd22','#17becf']
marker_list = ['o','x','*','v','^','d']
plt.rc('axes', prop_cycle=(cycler('color',color_list)))

from LRT import LRT
from LRT import moments
from LRT import figfuns

**Parameter struct**

In [None]:
par = LRT.ParStruct()
SAMPLE = '_p100'

## Load data

In [None]:
# a. load
data = LRT.dataStruct()

data.logY = np.transpose(np.genfromtxt(f'data/logY{SAMPLE}.csv',delimiter=','))
par.T, par.N = data.logY.shape

data.birthyears = np.transpose(np.genfromtxt(f'data/birthyear{SAMPLE}.csv',delimiter=','))
data.exogenous_features = [data.birthyears for t in range(par.T)]

print(f'(T,N) = ({par.T},{par.N})')

# b. rescale
Y = np.exp(data.logY)
Y_lev = Y
Y = Y / np.mean(Y[0,:]) # divide by avg. income at age 0
data.logY = np.log(Y)
    
# c. update par
par.simN = max([par.N,par.simN])
par.k = 5
par.k_lead = 30
par.depth = 10

par_dums = copy.deepcopy(par)
par_dums.use_exogenous_features = True

# Estimation

In [None]:
model_base = LRT.estimate(par, data, name='LRT (baseline)', color = color_list[0])
model_base.marker = marker_list[0]
model_base.name_short = 'LRT_baseline'

In [None]:
model_dums = LRT.estimate(par_dums, data, name='LRT (cohort)', color = color_list[1])
model_dums.marker = marker_list[1]
model_dums.name_short = 'LRT_cohort'
model_dums.par.use_exogenous_features = True

# Feature importance

In [None]:
# a. years
max_feat = max([model_dums.tree[t].n_features_ for t in range(par.T)])
ts = [t for t in range(par.T) if model_dums.tree[t].n_features_ == max_feat] 

# b. feature importance
fis = np.empty((len(ts), max_feat))
for j in range(max_feat): 
    fis[:,j] = [model_dums.tree[t].feature_importances_[j] for t in ts]

# c. print
for j in range(max_feat): 
    if j == max_feat-1:
        featname = 'cohorts:'
    elif j == max_feat-2:
        featname = 'income, current:'
    else:
        featname = f'income, lag {5-j}:'
    print(f'{featname:16} {np.mean(fis[:,j]):.4f}')

# Simulate

In [None]:
model_base.data = LRT.simulate(model_base.par,model_base,data)
model_dums.data = LRT.simulate(model_dums.par,model_dums,data)

# Moments

In [None]:
moments.calc_all(par,data,printprogress=True)

models = [model_base,model_dums]
for model in models: 
    print(model.name)
    moments.calc_all(model.par,model.data,printprogress=True)

## Figures 

In [None]:
prefix = 'cohorts'

figfuns.age_profile(par, data, models, 0, 
                    varname_str='logY', prefix=prefix)

figfuns.age_profile(par, data, models, 1, 
                    varname_str='dlogY', prefix=prefix)

figfuns.age_profile(par, data, models, 5, 
                    varname_str='dlogY', prefix=prefix)

figfuns.heterogenous(par, data, models,
                     'dlogY', 0, 'life-time growth (demeaned)',
                     prefix=prefix, bounds=(-1.5,1.5))

figfuns.heterogenous(par, data, models,
                     'autocorr', 1, 'first-order autocorr.',
                     prefix=prefix, bounds=(-1,1))

figfuns.heterogenous(par, data, models,
                     'autocorr_level', 1, 'first-order autocorr. (levels)',
                     prefix=prefix, bounds=(-.25,1))

figfuns.heterogenous(par, data, models,
                     'std_dlogY', 1, 'std. of income growth',
                     prefix=prefix, bounds=(0,0.5))