1. Loads data
2. Estimates LRT 
3. Censors all transition matrices
4. Writes to disk 

Relies on the functions in `censoring.py`, which can also read data back in and re-normalize the censored transition matrices. 

# Setup

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
    
import time
import copy
from itertools import product
import numpy as np

import matplotlib.pyplot as plt
from cycler import cycler
color_list  = ['#1f77b4','#ff7f0e','#2ca02c','#d62728','#9467bd','#8c564b','#e377c2','#7f7f7f','#bcbd22','#17becf']
marker_list = ['o','x','d','v','^','<','>']
plt.rc('axes', prop_cycle=(cycler('color',color_list)))

from LRT import LRT
from LRT import moments
from LRT import figfuns
from LRT import PT
from LRT import PCTBIN

from LRT import censoring

**Setup parameter struct:**

In [None]:
par = LRT.ParStruct()
SAMPLE = '_p100'

## Load data

In [None]:
# a. load
data = LRT.dataStruct()
data.logY = np.transpose(np.genfromtxt(f'data/logY{SAMPLE}.csv',delimiter=','))
T, par.N = data.logY.shape

# b. rescale
Y = np.exp(data.logY)
Y_lev = Y
Y = Y / np.mean(Y[0,:])
data.logY = np.log(Y)

# c. save simple data moments
mean_Y = np.ones(par.T)    
mean_Y_lev = np.ones(par.T)    
mean_logY = np.ones(par.T)
var_logY = np.ones(par.T)

for t in range(par.T):
    mean_Y[t] = np.mean(Y[t,:])
    mean_Y_lev[t] = np.mean(Y_lev[t,:])        
    mean_logY[t] = np.mean(data.logY[t,:])
    var_logY[t] = np.var(data.logY[t,:])
    
# d. upgrade simulation for large sample
par.simN = max([par.N,par.simN])

# Estimate 

## LRT

Preferred specification is the depth 6. The amount of censoring (observations and cells in the transition matrices, averaged over all ages) from various depths is: 
* depth 5: 0.15% of observations; 15.07% of cells, 
* depth 6: 0.56% of observations; 16.56% of cells,
* depth 7: 1.68% of observations; 14.53% of cells. 

In [None]:
models_LRT = []

for depth,name_short,name in zip([6],
                                 ['LRT_depth_6'],
                                 ['LRT (depth 6)']):

    # a. settings
    par.k = 5
    par.k_lead = 30    
    par.depth = depth

    # b. estimate
    model = LRT.estimate(par,data,name)
    model.name_short = name_short
    
    models_LRT.append(model)

In [None]:
prefmodel = models_LRT[0] 

## Write the model to disk

In [None]:
DOSAVE = True
min_obs_per_cell = 5
model = prefmodel
out_dir = f'censored_estimates'

censoring.write_model_to_disk(out_dir, model, min_obs_per_cell, DOCENSOR=True)

# Testing: reading back, simulating and plotting

## Read back in

In [None]:
m_load = censoring.read_censored_data(in_dir=out_dir, T=par.T, N=par.N)

## Simulate and compute

To verify that the censoring has not drastically impacted what simulations look like. 

In [None]:
# simulate data
prefmodel.data = LRT.simulate(par, prefmodel, data)
m_load.data = censoring.simulate(m_load)

In [None]:
# compute moments 
for x in [data, m_load.data, prefmodel.data]: 
    moments.calc_all(par,x,printprogress=True) 

## Plot

In [None]:
prefmodel.name = 'LRT, exact'
m_load.name = 'LRT, censored'

In [None]:
prefmodel.marker = 'x'

In [None]:
prefmodel.color = color_list[1]

In [None]:
figfuns.age_profile(par, data, [prefmodel, m_load], 0, varname_str='logY', prefix='censoring')

In [None]:
figfuns.age_profile(par, data, [prefmodel, m_load], 5, 
                    varname_str='dlogY', prefix='censoring')