# Setup

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
    
import time
import copy
from itertools import product
import numpy as np

import matplotlib.pyplot as plt
from cycler import cycler
color_list  = ['#1f77b4','#ff7f0e','#2ca02c','#d62728','#9467bd','#8c564b','#e377c2','#7f7f7f','#bcbd22','#17becf']
marker_list = ['o','x','d','v','^','<','>']
plt.rc('axes', prop_cycle=(cycler('color',color_list)))

from LRT import LRT
from LRT import moments
from LRT import figfuns
from LRT import PT
from LRT import PCTBIN

**Setup parameter struct:**

In [None]:
par = LRT.ParStruct()
SAMPLE = '_p100'

## Load data

In [None]:
# a. load
data = LRT.dataStruct()
data.logY = np.transpose(np.genfromtxt(f'data/logY{SAMPLE}.csv',delimiter=','))
T, par.N = data.logY.shape

# b. rescale
Y = np.exp(data.logY)
Y_lev = Y
Y = Y / np.mean(Y[0,:])
data.logY = np.log(Y)

# c. save simple data moments
mean_Y = np.ones(par.T)    
mean_Y_lev = np.ones(par.T)    
mean_logY = np.ones(par.T)
var_logY = np.ones(par.T)

for t in range(par.T):
    mean_Y[t] = np.mean(Y[t,:])
    mean_Y_lev[t] = np.mean(Y_lev[t,:])        
    mean_logY[t] = np.mean(data.logY[t,:])
    var_logY[t] = np.var(data.logY[t,:])
    
np.savetxt("data/mean_Y.txt",mean_Y,delimiter=',',fmt='%f',newline='\r\n')
np.savetxt("data/mean_Y_lev.txt",mean_Y_lev,delimiter=',',fmt='%f',newline='\r\n')
np.savetxt("data/mean_logY.txt",mean_logY,delimiter=',',fmt='%f',newline='\r\n')
np.savetxt("data/var_logY.txt",var_logY,delimiter=',',fmt='%f',newline='\r\n')
    
# d. upgrade simulation for large sample
par.simN = max([par.N,par.simN])

## Histograms

In [None]:
ages = np.arange(par.agemin, par.agemax+1)

**Histograms** of income **levels**.

In [None]:
pcts = np.percentile(data.logY.flatten(), q=[0.1, 99.9])
for i,t in enumerate([0,15,29]): 
    
    fig,ax = figfuns.new()
    ax.hist(data.logY[t,:],bins=300)
    
    ax.set_ylabel('observations')
    ax.set_xlabel('log income ($y_{it}$)')
    ax.set_xlim(pcts)
    
    figfuns.save(fig,ax,name=f'hist_inc_level_age{ages[t]}')
    plt.close(fig)

**Histograms** of income **growth**

In [None]:
growth = data.logY[1:, :] - data.logY[:-1, :]
pcts = np.percentile(growth.flatten(), q=[0.1, 99.9])
for i,t in enumerate([0,14,28]): 
    
    fig,ax = figfuns.new()
    ax.hist(growth[t,:], bins=500)
    
    ax.set_ylabel('observations')
    ax.set_xlabel('log income differences ($y_{i'+str(t+1)+'} - y_{i'+str(t)+'}$)')
    ax.set_xlim(pcts)
    
    figfuns.save(fig, ax, name=f'hist_inc_growth_age{ages[t+1]}')
    plt.close(fig)

## Load ABB

In [None]:
# a. load abb
data_abbsim = LRT.dataStruct()
data_abbsim.logY = np.transpose(np.genfromtxt('data/abb_sim.csv',delimiter=','))

# b. setup
par_abb = LRT.ParStruct()
par_abb.T, par_abb.N = data_abbsim.logY.shape

# c. normalize
Y = np.exp(data_abbsim.logY)
Y = Y / np.mean(Y[0,:])
data_abbsim.logY = np.log(Y)

# d. add model details
model_abb = LRT.modelStruct()
model_abb.data = data_abbsim
model_abb.type = 'ABB'
model_abb.name = 'ABB'
model_abb.name_short = 'ABB'
model_abb.par = copy.deepcopy(par)

# Estimate and simulate

## LRT

In [None]:
models_LRT = []

for depth,name_short,name in zip([7,10],
                                 ['LRT_depth_7','LRT_depth_10'],
                                 ['LRT (depth 7)','LRT (depth 10)']):

    # a. settings
    par.k = 5
    par.k_lead = 30    
    par.depth = depth

    # b. estimate
    model = LRT.estimate(par,data,name)
    model.name_short = name_short
    
    models_LRT.append(model)

prefmodel = models_LRT[-1]

**Save:**

In [None]:
# a. number of leafs
np.savetxt("data/num_leafs.txt",prefmodel.num_leafs,delimiter=',',fmt='%d',newline='\r\n')

# b. initial grouping
prob_G_ini = np.ones(prefmodel.num_leafs[0])
for i in range(prefmodel.num_leafs[0]):
    I = (prefmodel.G[0] == prefmodel.uniqueG[0][i])
    prob_G_ini[i] = I.sum()/par.N
np.savetxt("data/prob_G_ini.txt",prob_G_ini,delimiter=',',fmt='%f',newline='\r\n')
        
# c. prediction and transitions    
for t in range(0,par.T):
    
    # i. save ypred
    filename = "data/ypred_G_t{:d}.txt".format(t)
    np.savetxt(filename, prefmodel.ypred_G[t],delimiter=',',fmt='%f',newline='\r\n')    

    # ii. distribution over groups
    maxG = np.amax(prefmodel.uniqueG[t])
    count = np.bincount(prefmodel.G[t], minlength=maxG+1)    
    minnum = np.min(count[prefmodel.uniqueG[t]])
    p50num = np.int(np.median(count[prefmodel.uniqueG[t]]))
    maxnum = np.max(count[prefmodel.uniqueG[t]])  
    
    if t == 0:
        continue
        
    # iii. save trans   
    trans = np.copy(prefmodel.trans_obs[t])
    filename = "data/trans_t{:d}.txt".format(t)
    np.savetxt(filename,trans,delimiter=',',fmt='%d',newline='\r\n') 

**Simulate:**

In [None]:
for model in models_LRT:
    model.data = LRT.simulate(model.par, model, data)

## PT

In [None]:
# a. estimates
model_PT = PT.estimate(par,data)

# b. simulate          
model_PT.data = PT.simulate(model_PT.par, model_PT, data)

# c. save
np.savetxt("output/PT_estimates.txt",np.array([model_PT.sigma_psi,model_PT.sigma_xi,model_PT.P_ini_std]),
           delimiter=',',fmt='%f',newline='\r\n')

## PCTBIN

In [None]:
# a. estimate 
model_PCTBIN = PCTBIN.estimate(par,data, num_bins=20)

# b. simulate
model_PCTBIN.data = PCTBIN.simulate(model_PCTBIN,data)

## List of models

In [None]:
models = [models_LRT[0],models_LRT[1],model_PT,model_PCTBIN,model_abb]

# colors and markers
for i,model in enumerate(models): 
    model.color = color_list[i]
    model.marker = marker_list[i]

## Group means 

In [None]:
for model in models_LRT + [model_PCTBIN]:
    
    fig, ax = figfuns.new()
    ages = np.arange(par.agemin, par.agemax+1)
    
    for t in range(par.T): 
        
        y = model.ypred_G[t]
        age = ages[t]*np.random.normal(loc=1.0,scale=0.0,size=y.shape)
        ax.plot(age, y, linestyle='', marker='o',markersize=2,color=model.color)
        
        ax.set_ylabel('log income ($\mu_{gt}$)')
        ax.set_xlabel('age')
        
        figfuns.save(fig,ax,f'group_means_of_income_{model.name_short}')
        plt.close()

## Transition rates

In [None]:
def censor_trans_mat(trans_mat, trans_obs): 
    
    Y = copy.deepcopy(trans_mat)
    I = (trans_obs<=5) & (trans_obs>0)
    Y = Y**0.75
    
    return Y

def plot_trans_mat(model,t): 
    
    fig,ax = figfuns.new() 
    ax.grid(False)
    
    Y = censor_trans_mat(model.trans[t], model.trans_obs[t])
    ax.imshow(Y, cmap='hot_r', interpolation='nearest')
    
    ax.set_xlabel(f'groups $t={t+1}$')
    ax.set_ylabel(f'groups $t={t}$')
    ax.set_yticks([])
    ax.set_xticks([])
    ax.set_yticklabels([])
    
    fig.tight_layout() 
    figfuns.save(fig,ax,name=f'trans_mat_{model.name_short}')
    plt.close()
    

In [None]:
t = 10    
for model in [models_LRT[0],models_LRT[1],model_PCTBIN]:
    plot_trans_mat(model,10)

# Moments

In [None]:
moments.calc_all(par,data,printprogress=True) 

In [None]:
for model in models:
    print(model.name)    
    moments.calc_all(model.par,model.data,printprogress=True)  

# Figures 

## Levels (life-cycle profile)  

In [None]:
figfuns.age_profile(par, data, models, 0, varname_str='logY', prefix='compare')

## Changes (life-cycle profile)  

In [None]:
figfuns.age_profile(par, data, models, 1, 
                    varname_str='dlogY', prefix='compare')

##  5-year changes (life-cycle profile)  

In [None]:
figfuns.age_profile(par, data, models, 5, 
                    varname_str='dlogY', prefix='compare')

##  10-year changes (life-cycle profile)  

In [None]:
figfuns.age_profile(par, data, models, 10, 
                    varname_str='dlogY', prefix='compare')

## Changes (over recent income and age-groups)  

In [None]:
for model in models:
    k = 1
    for mom_str in ['mean','var','skew','kurt']:

        figfuns.REpercs_profile(par, data, model, k, mom_str,
                                varname_str='dlogY',
                                age_grp_str='detail',
                                perc_grp_str='RE', modelname= model.name_short)


## 5-year changes (over RE and age-groups)

In [None]:
for model in models:
    k = 5
    for mom_str in ['mean','var','skew','kurt']:

        figfuns.REpercs_profile(par, data, model, k, mom_str,
                                varname_str='dlogY',
                                age_grp_str='detail',
                                perc_grp_str='RE', modelname=model.name_short)


## 10-year changes (over RE and age-groups)

In [None]:
for model in models:
    k = 10
    for mom_str in ['mean','var','skew','kurt']:

        figfuns.REpercs_profile(par, data, model, k, mom_str,
                                varname_str='dlogY',
                                age_grp_str='detail',
                                perc_grp_str='RE', modelname=model.name_short)

## Covariances over age 

In [None]:
for model in models:
    prefix = model.name_short    
    figfuns.cov(par, data, model, prefix=prefix)

## Autocorrelations  

In [None]:
for model in models:
    figfuns.autocorr(par, data, model, k_list=[1,2,5],
                     varname_str='dlogY',
                     modelname=model.name_short)

In [None]:
for model in models:
    figfuns.autocorr(par, data, model, k_list=[1,2,5],
                     varname_str='logY',
                     modelname=model.name_short)

## Covariances of levels and future growth 

In [None]:
for model in models: 
    figfuns.cov_YdY(par,data,model,prefix=model.name_short)

## Heterogeneity

In [None]:
figfuns.heterogenous(par, data, models,
                     'dlogY', 0, 'life-time growth (demeaned)',
                     prefix='compare', bounds=(-1.5,1.5))

In [None]:
figfuns.heterogenous(par, data, models,
                     'autocorr', 1, 'first-order autocorr.',
                     prefix='compare', bounds=(-1,1))

In [None]:
figfuns.heterogenous(par, data, models,
                     'autocorr_level', 1, 'first-order autocorr. (levels)',
                     prefix='compare', bounds=(-.25,1))

In [None]:
figfuns.heterogenous(par, data, models,
                     'std_dlogY', 1, 'std. of income growth',
                     prefix='compare', bounds=(0,0.5))

## Compare fit across models for single  age group over RE percs 

In [None]:
for k,iage in product([1,5],[0,4]):
    mom_str = 'mean' 
    figfuns.REpercs_profile_compare_models(par,data,models,k,mom_str,
                                           varname_str='dlogY',age_grp_str='detail',perc_grp_str='RE',iage=iage,
                                           prefix='compare')

## Within vs. between

In [None]:
for mom in [2,3,4]: 
    figfuns.moments_within_groups(data, models[1],  marker_list, color_list, 
                                  central_moment=mom, kk=[1,5,10], 
                                  prefix=models[1].name_short)