In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler

In [2]:
data_dir = r'~/rawdata/fannie_data/'
static_data_fname = r'fannie_acquisition.csv'
dynamic_data_fname = r'fannie_performance.csv'

static_data = pd.read_csv(data_dir + static_data_fname)
dynamic_data = pd.read_csv(data_dir + dynamic_data_fname)
print(static_data.shape)
print(dynamic_data.shape)

(200000, 27)
(5246529, 36)


In [3]:
assert set(dynamic_data['loan_id'].unique()) ^ set(static_data['loan_id'].unique()) == set(), \
        "Different Loan IDs in static and dynamic file"

In [4]:
loan_id_sample = pd.Series(dynamic_data['loan_id'].unique()).sample(n=10000, random_state=0)

In [5]:
dynamic_data_sample = dynamic_data[dynamic_data['loan_id'].isin(loan_id_sample)].copy()
static_data_sample = static_data[static_data['loan_id'].isin(loan_id_sample)].copy()

In [7]:
import pandas_profiling
dynamic_data_sample.profile_report()

HBox(children=(HTML(value='Summarize dataset'), FloatProgress(value=0.0, max=50.0), HTML(value='')))




HBox(children=(HTML(value='Generate report structure'), FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(HTML(value='Render HTML'), FloatProgress(value=0.0, max=1.0), HTML(value='')))






In [6]:
dynamic_data_sample['reporting_period'] = pd.to_datetime(dynamic_data_sample['reporting_period'])

dynamic_data_sample['DQ_STATUS'] = dynamic_data_sample['current_delinquency_status'].replace('X', -1).astype(float).clip(upper=3)
dynamic_data_sample['NEXT_DQ_STATUS'] = dynamic_data_sample.groupby('loan_id')['DQ_STATUS'].shift(-1)

In [7]:
tm = dynamic_data_sample.pivot_table(index = 'DQ_STATUS', columns = 'NEXT_DQ_STATUS', values = 'current_actual_upb',  aggfunc = 'sum')
tm.apply(lambda x: x/x.sum(), axis = 1) * 100

NEXT_DQ_STATUS,-1.0,0.0,1.0,2.0,3.0
DQ_STATUS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,0.803426,98.801534,0.393958,0.000756,0.000326
1.0,2.360727,51.925987,34.167633,11.50522,0.040433
2.0,3.693141,15.185924,19.305705,26.354134,35.461096
3.0,0.449299,7.044059,3.507152,0.794581,88.204908


In [8]:
dynamic_data_sample['orig_date'] = pd.to_datetime(dynamic_data_sample['first_payment_date']) - pd.DateOffset(months = 1)

In [9]:
dyn_cols = ['loan_id', 'reporting_period', 'orig_date', 'loan_age', 'current_actual_upb', 'current_interest_rate',
            'property_state', 'DQ_STATUS', 'NEXT_DQ_STATUS']

In [10]:
dynamic_model_data = dynamic_data_sample[dyn_cols]

In [11]:
latest_reporting_period = dynamic_model_data['loan_id'].map(dynamic_model_data.groupby('loan_id')['reporting_period'].max())
dynamic_model_data = dynamic_model_data[dynamic_model_data['reporting_period'] < latest_reporting_period]

dynamic_model_data['current_actual_upb'] = dynamic_model_data['current_actual_upb'].fillna(method = 'bfill')

dynamic_model_data = dynamic_model_data.dropna()

In [12]:
dynamic_model_data.isna().sum()

loan_id                  0
reporting_period         0
orig_date                0
loan_age                 0
current_actual_upb       0
current_interest_rate    0
property_state           0
DQ_STATUS                0
NEXT_DQ_STATUS           0
dtype: int64

In [13]:
ten_rate = pandas_datareader.fred.FredReader('GS10', start='20100101', end='20200731').read()

In [14]:
dynamic_model_data['ten_rate'] = dynamic_model_data['reporting_period'].map(ten_rate['GS10'])
dynamic_model_data['coupon_differential'] = dynamic_model_data['current_interest_rate'] - dynamic_model_data['ten_rate']
dynamic_model_data.head(5)

Unnamed: 0,loan_id,reporting_period,orig_date,loan_age,current_actual_upb,current_interest_rate,property_state,DQ_STATUS,NEXT_DQ_STATUS,ten_rate,coupon_differential
733,111752245193,2016-07-01,2016-06-01,1,147764.02,4.125,AL,0.0,0.0,1.5,2.625
734,111752245193,2016-08-01,2016-06-01,2,147764.02,4.125,AL,0.0,0.0,1.56,2.565
735,111752245193,2016-09-01,2016-06-01,3,147764.02,4.125,AL,0.0,0.0,1.63,2.495
736,111752245193,2016-10-01,2016-06-01,4,147764.02,4.125,AL,0.0,0.0,1.76,2.365
737,111752245193,2016-11-01,2016-06-01,5,147764.02,4.125,AL,0.0,0.0,2.14,1.985


In [15]:
static_cols = ['loan_id', 'original_upb', 'original_loan_term', 'original_ltv', 'debt_to_income_ratio', 'credit_score', 'occupancy_status', 'loan_purpose']
model_data = pd.merge(dynamic_model_data, static_data_sample.loc[:, static_cols], how = 'left', on = 'loan_id')

In [16]:
occupancy_dict = {'P':'Owner Occupied', 'I':'Investment Property', 'S':'Vacation'}
loan_purpose_dict = {'P':'PR', 'C':'EQ', 'R':'RE'}
model_data['occupancy_status'] = model_data[ 'occupancy_status'].map(occupancy_dict)
model_data['loan_purpose'] = model_data['loan_purpose'].map(loan_purpose_dict)

In [17]:
model_data.isna().sum()

loan_id                    0
reporting_period           0
orig_date                  0
loan_age                   0
current_actual_upb         0
current_interest_rate      0
property_state             0
DQ_STATUS                  0
NEXT_DQ_STATUS             0
ten_rate                   0
coupon_differential        0
original_upb               0
original_loan_term         0
original_ltv               0
debt_to_income_ratio      27
credit_score             180
occupancy_status           0
loan_purpose               0
dtype: int64

In [18]:
model_data = model_data.dropna()

In [19]:
model_data['summer'] = model_data['reporting_period'].dt.month.between(4, 7).astype(int)

In [20]:
zillow = pd.read_csv("~/mfe230m/housing/zillow_data.csv"
                         ).set_index('RegionName')
zillow.columns.name = 'Column'
top_cols = ['United States', 'California', 'New York', 'Florida', 'Texas']
zillow = zillow.loc[top_cols, :].iloc[:,4:].T
zillow = zillow.dropna(how = 'any', axis = 1).loc["2000-01-31":,:]
zillow.index = pd.to_datetime(zillow.index) +  pd.DateOffset(days = 1)
zillow = zillow.reset_index().melt(id_vars = 'Column').rename(columns = {'Column' : 'Date'})
zillow.set_index(zillow['Date'].astype('str') + zillow['RegionName'], inplace = True)

zillow.head()

Unnamed: 0,Date,RegionName,value
2000-02-01United States,2000-02-01,United States,126205.0
2000-03-01United States,2000-03-01,United States,126925.0
2000-04-01United States,2000-04-01,United States,127649.0
2000-05-01United States,2000-05-01,United States,128388.0
2000-06-01United States,2000-06-01,United States,129127.0


In [21]:
model_data['state_hpi'] = model_data['property_state'].map((lambda s: {'CA': 'California', 'NY': 'New York', 'TX': 'Texas', 'FL': 'Florida'}.get(s, 'United States')))

In [22]:
model_data['zillow_index'] = (model_data['reporting_period'].astype('str') + 
                                     model_data['state_hpi']).map(zillow['value'])
model_data['orig_zillow_index'] = (model_data['orig_date'].astype('str') + 
                                     model_data['state_hpi']).map(zillow['value'])

In [23]:
model_data['curr_ltv'] = (model_data['current_actual_upb'] / model_data['original_upb'] * 
                                 model_data['original_ltv'] * model_data['orig_zillow_index'] / 
                                 model_data['zillow_index'])        

In [24]:
model_data['loan_age_square'] = model_data['loan_age']**2

In [25]:
model_dummy_data = pd.get_dummies(model_data.loc[:, ['occupancy_status', 'loan_purpose', 'state_hpi']]).loc[
    :, ['occupancy_status_Investment Property', 'occupancy_status_Owner Occupied', 
        'loan_purpose_EQ', 'loan_purpose_PR', 
        'state_hpi_California', 'state_hpi_Florida', 'state_hpi_New York', 'state_hpi_Texas']]
model_cols = ['loan_id', 'reporting_period', 'loan_age', 'loan_age_square', 'original_upb', 'current_actual_upb', 'original_loan_term', 'current_interest_rate', 'coupon_differential',
              'debt_to_income_ratio', 'credit_score', 'summer', 'original_ltv', 'curr_ltv', 'DQ_STATUS', 'NEXT_DQ_STATUS']
final_model_data = pd.concat([model_data[model_cols], model_dummy_data], axis = 1)
final_model_data.head()

Unnamed: 0,loan_id,reporting_period,loan_age,loan_age_square,original_upb,current_actual_upb,original_loan_term,current_interest_rate,coupon_differential,debt_to_income_ratio,...,DQ_STATUS,NEXT_DQ_STATUS,occupancy_status_Investment Property,occupancy_status_Owner Occupied,loan_purpose_EQ,loan_purpose_PR,state_hpi_California,state_hpi_Florida,state_hpi_New York,state_hpi_Texas
0,111752245193,2016-07-01,1,1,149000,147764.02,360,4.125,2.625,27.0,...,0.0,0.0,0,1,1,0,0,0,0,0
1,111752245193,2016-08-01,2,4,149000,147764.02,360,4.125,2.565,27.0,...,0.0,0.0,0,1,1,0,0,0,0,0
2,111752245193,2016-09-01,3,9,149000,147764.02,360,4.125,2.495,27.0,...,0.0,0.0,0,1,1,0,0,0,0,0
3,111752245193,2016-10-01,4,16,149000,147764.02,360,4.125,2.365,27.0,...,0.0,0.0,0,1,1,0,0,0,0,0
4,111752245193,2016-11-01,5,25,149000,147764.02,360,4.125,1.985,27.0,...,0.0,0.0,0,1,1,0,0,0,0,0


In [2]:
# final_model_data.to_csv('final_model_data.csv')
final_model_data = pd.read_csv('final_model_data.csv')

In [3]:
def transition_upb():
    tm = final_model_data.pivot_table(index = 'DQ_STATUS', columns = 'NEXT_DQ_STATUS', values = 'current_actual_upb',  aggfunc = 'sum')
    return (tm.apply(lambda x: x/x.sum(), axis = 1) * 100).fillna(0)

In [4]:
def fix_transition(from_status, wrong_to_status_list, correct_to_status):
    mask = (final_model_data['DQ_STATUS'] == from_status) & (final_model_data['NEXT_DQ_STATUS'].isin(wrong_to_status_list))
    final_model_data.loc[mask, 'NEXT_DQ_STATUS'] = correct_to_status

In [5]:
transition_upb()

NEXT_DQ_STATUS,-1.0,0.0,1.0,2.0,3.0
DQ_STATUS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,0.689374,98.92811,0.380915,0.001354,0.000247
1.0,2.113105,54.874663,31.22929,11.748357,0.034586
2.0,3.421665,15.267859,16.19488,29.797997,35.317599
3.0,0.436357,7.634179,3.789786,0.900236,87.239441


In [6]:
fix_transition(0, [2, 3], 1)
fix_transition(1, [3], 2)
fix_transition(1, [-1], 0)
fix_transition(2, [-1, 0], 1)
fix_transition(3, [-1, 0, 1, 2], 3) # absorbing


In [7]:
transition_upb()

NEXT_DQ_STATUS,-1.0,0.0,1.0,2.0,3.0
DQ_STATUS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,0.689374,98.92811,0.382516,0.0,0.0
1.0,0.0,56.987767,31.22929,11.782943,0.0
2.0,0.0,0.0,34.884404,29.797997,35.317599
3.0,0.0,0.0,0.0,0.0,100.0


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.metric import roc_auc_score
train_loan_id, test_loan_id = train_test_split(loan_id_sample, test_size=0.2, random_state=0)

ModuleNotFoundError: No module named 'sklearn.metric'

In [None]:
def standardize(X, scaler = None):
    if scaler is None:
        scaler = StandardScaler()
        scaler.fit(X)
        return scaler.transform(X), scaler
    else:
        return scaler.transform(X), scaler
    
def standardize_data(X, scaler = None):
    dummy_cols = ['occupancy_status_Investment Property', 'occupancy_status_Owner Occupied', 'loan_purpose_EQ', 'loan_purpose_PR',
                 'state_hpi_California', 'state_hpi_Florida', 'state_hpi_New York', 'state_hpi_Texas']
    nondummy_cols = list(set(X.columns) - set(dummy_cols))
    nondummy_cols.sort()
    dummy_X = X.loc[:, dummy_cols]
    nondummy_X = X.loc[:, nondummy_cols]
    nondummy_X_stand, scaler = standardize(nondummy_X, scaler)
    nondummy_X_stand = pd.DataFrame(nondummy_X_stand, columns = nondummy_cols)
    X_stand = pd.concat([nondummy_X_stand.reset_index(drop = True),
                         dummy_X.reset_index(drop = True)], axis = 1)
    return X_stand, scaler

def fit_logit(train_data, max_iter=100):
    models = {}
    for current_status in [0, 1, 2]:
        model_data = train_data.loc[train_data['DQ_STATUS'] == current_status, :]
        X = model_data.loc[:, set(model_data.columns) - 
                       set(['DQ_STATUS', 'NEXT_DQ_STATUS', 'loan_id', 'reporting_period', 'current_actual_upb'])]
        y = model_data.loc[:, ['NEXT_DQ_STATUS']].values.reshape(-1)
        sample_weight = model_data.loc[:, 'current_actual_upb'].values.reshape(-1)
        sample_weight = sample_weight/sample_weight.mean()

        X_stand, standardizer = standardize_data(X)
    
        model = LogisticRegression(penalty = 'elasticnet', l1_ratio = 0.2, class_weight = None, solver = 'saga', 
                            random_state=0, warm_start = True, max_iter = max_iter, n_jobs = -1).fit(
            X_stand, y, sample_weight = sample_weight)
        models[current_status] = (standardizer, model)
    return models


def evaluate_logit(test_data, models):
    results = {}
    for current_status, (standardizer, model) in models.items():
        model_data = test_data.loc[test_data['DQ_STATUS'] == current_status, :]
        X = model_data.loc[:, set(model_data.columns) - 
                       set(['DQ_STATUS', 'NEXT_DQ_STATUS', 'loan_id', 'reporting_period', 'current_actual_upb'])]
        y = model_data.loc[:, ['NEXT_DQ_STATUS']].values.reshape(-1)

        X_stand = standardize_data(X, scaler)
        y_predict = model.predict_proba(X_stand)

        for next_move in [-1, 0, 1]:
            next_status = current_status + next_move
            results[(current_status, next_status)] = roc_auc_score(y == next_status, y_predict[:, {-1: 0, 0:1, 1:2, 2:3, 3:4}[next_status]])
    return results


In [None]:
models = fit_logit(final_model_data[final_model_data['loan_id'].isin(train_loan_id)])
results = evaluate_logit(final_model_data[final_model_data['loan_id'].isin(test_loan_id)], models)

In [33]:
def run_logistic_model(final_model_data, current_status = 0, max_iter = 100):
    model_data = final_model_data.loc[final_model_data['DQ_STATUS'] == current_status,:]
    X = model_data.loc[:, set(model_data.columns) - 
                       set(['DQ_STATUS', 'NEXT_DQ_STATUS', 'loan_id', 'reporting_period', 'current_actual_upb'])]
    y = model_data.loc[:, ['NEXT_DQ_STATUS']].values.reshape(-1)
    sample_weight = model_data.loc[:, 'current_actual_upb'].values.reshape(-1)
    sample_weight = sample_weight/sample_weight.mean()
#     sample_weight = None
    X_stand, standardizer = standardize_data(X)
    
    model = LogisticRegression(penalty = 'elasticnet', l1_ratio = 0.2, class_weight = None, solver = 'saga', 
                         random_state=0, warm_start = True, max_iter = max_iter, n_jobs = -1).fit(
        X_stand, y, sample_weight = sample_weight)
    y_predict = model.predict(X_stand)
    cols = list(map(lambda x: str(x), [current_status - 1, current_status, current_status + 1]))
    cols4 = list(map(lambda x: str(current_status)+'->'+x, cols))
    cols2 = list(map(lambda x: 'Predicted ' + x, cols4))
    cols3 = list(map(lambda x: 'Actual ' + x, cols4))
    
    
    print("Confusion Matrix")
    display(pd.DataFrame(confusion_matrix(y, y_predict), 
                         index = cols3, columns = cols2))
    print("\n\nModel Score is: ", model.score(X_stand, y))
    
    print("\n\nMean probability of transitions")
    display(pd.DataFrame(
        np.vstack((
            model.predict_proba(X_stand[y == current_status - 1]).mean(axis = 0)*100,
            model.predict_proba(X_stand[y == current_status]).mean(axis = 0)*100,
            model.predict_proba(X_stand[y == current_status + 1]).mean(axis = 0)*100
        )),
        index = cols3, columns = cols2
    ))
    
    print("\n\nHistorical transition rates")
    tm = model_data.pivot_table(index = 'DQ_STATUS', columns = 'NEXT_DQ_STATUS', 
                                values = 'current_actual_upb',  aggfunc = 'sum')
    display(tm.apply(lambda x: x/x.sum(), axis = 1) * 100)
    
    print("\n\nModel coefficients")
    model_coeff = pd.DataFrame(model.coef_, columns = X_stand.columns, index = cols4)
    model_coeff.index.name = 'Transition Model'
    display(model_coeff)
    return model, standardizer

In [34]:
current_model, current_standardizer = run_logistic_model(final_model_data, current_status = 0, max_iter = 1000)

Confusion Matrix


Unnamed: 0,Predicted 0->-1,Predicted 0->0,Predicted 0->1
Actual 0->-1,1,1652,0
Actual 0->0,5,246588,0
Actual 0->1,0,1015,0




Model Score is:  0.9892803126040576


Mean probability of transitions


Unnamed: 0,Predicted 0->-1,Predicted 0->0,Predicted 0->1
Actual 0->-1,1.19659,98.302597,0.500813
Actual 0->0,0.643859,98.95206,0.404081
Actual 0->1,0.817536,98.320279,0.862185




Historical transition rates


NEXT_DQ_STATUS,-1.0,0.0,1.0
DQ_STATUS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,0.689374,98.92811,0.382516




Model coefficients


Unnamed: 0_level_0,coupon_differential,credit_score,curr_ltv,current_interest_rate,debt_to_income_ratio,loan_age,loan_age_square,original_loan_term,original_ltv,original_upb,summer,occupancy_status_Investment Property,occupancy_status_Owner Occupied,loan_purpose_EQ,loan_purpose_PR,state_hpi_California,state_hpi_Florida,state_hpi_New York,state_hpi_Texas
Transition Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0->-1,0.436379,0.200964,0.292931,0.0,-0.005393,0.888337,-0.627995,-0.204469,-0.251461,0.103373,0.069305,-0.479439,0.0,0.0,-0.077949,0.27494,-0.211674,-0.840759,-0.161853
0->0,-0.132817,0.148997,-0.094756,-0.172235,0.0,-0.435886,0.21166,0.071334,0.12102,-0.037407,0.0,0.12241,0.199303,-0.116209,0.0,0.0,0.0,0.481266,0.0
0->1,-0.116049,-0.537473,-0.010787,0.05146,0.006519,-0.265192,0.229238,0.0,0.0,0.0,-0.058519,0.173896,-0.072575,0.040926,0.057337,-0.185087,0.266893,0.173446,0.058594


In [35]:
d30_model, d30_standardizer = run_logistic_model(final_model_data, current_status = 1, max_iter = 1000)

Confusion Matrix


Unnamed: 0,Predicted 1->0,Predicted 1->1,Predicted 1->2
Actual 1->0,653,173,1
Actual 1->1,278,220,1
Actual 1->2,123,67,2




Model Score is:  0.5764163372859025


Mean probability of transitions


Unnamed: 0,Predicted 1->0,Predicted 1->1,Predicted 1->2
Actual 1->0,59.812051,28.612314,11.575635
Actual 1->1,47.031386,40.498571,12.470044
Actual 1->2,50.718898,34.532667,14.748435




Historical transition rates


NEXT_DQ_STATUS,0.0,1.0,2.0
DQ_STATUS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,56.987767,31.22929,11.782943




Model coefficients


Unnamed: 0_level_0,coupon_differential,credit_score,curr_ltv,current_interest_rate,debt_to_income_ratio,loan_age,loan_age_square,original_loan_term,original_ltv,original_upb,summer,occupancy_status_Investment Property,occupancy_status_Owner Occupied,loan_purpose_EQ,loan_purpose_PR,state_hpi_California,state_hpi_Florida,state_hpi_New York,state_hpi_Texas
Transition Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1->0,-0.031116,0.36974,0.031035,-0.052451,-0.038114,-0.259632,0.067994,0.0,-0.157226,0.191031,0.035706,0.45631,0.0,0.0,0.0,-0.286606,-0.033613,0.050162,0.0
1->1,0.0,-0.13331,-0.410944,0.318848,0.0,0.873097,-0.667393,-0.006115,0.391869,0.0,0.0,-0.707384,-0.212193,-0.124033,0.004135,0.050713,-0.47598,-0.15205,-0.10723
1->2,0.182351,-0.010163,0.153872,-0.040222,0.079782,-0.387398,0.373279,0.112299,-0.009533,-0.000382,-0.11861,0.041845,0.138568,0.0,-0.131852,0.010251,0.734942,0.0,0.295002


In [36]:
d60_model, d60_standardizer = run_logistic_model(final_model_data, current_status = 2, max_iter = 1000)

Confusion Matrix


Unnamed: 0,Predicted 2->1,Predicted 2->2,Predicted 2->3
Actual 2->1,55,18,22
Actual 2->2,23,24,28
Actual 2->3,30,12,52




Model Score is:  0.4962121212121212


Mean probability of transitions


Unnamed: 0,Predicted 2->1,Predicted 2->2,Predicted 2->3
Actual 2->1,43.080962,25.284847,31.634191
Actual 2->2,30.253571,36.437664,33.308765
Actual 2->3,31.741961,27.031444,41.226595




Historical transition rates


NEXT_DQ_STATUS,1.0,2.0,3.0
DQ_STATUS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2.0,34.884404,29.797997,35.317599




Model coefficients


Unnamed: 0_level_0,coupon_differential,credit_score,curr_ltv,current_interest_rate,debt_to_income_ratio,loan_age,loan_age_square,original_loan_term,original_ltv,original_upb,summer,occupancy_status_Investment Property,occupancy_status_Owner Occupied,loan_purpose_EQ,loan_purpose_PR,state_hpi_California,state_hpi_Florida,state_hpi_New York,state_hpi_Texas
Transition Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2->1,0.109117,0.296316,0.344682,-0.305555,-0.052637,0.511749,-0.331958,0.220575,-0.128407,0.015814,0.05556,0.0,-0.419989,0.0,-0.149378,-0.676476,-0.751997,-0.57575,0.0
2->2,-0.462516,0.0,-0.237521,0.843055,0.0,0.0,0.0,-0.539696,0.163788,0.0,0.0,-0.006325,0.897072,0.230051,0.592726,0.099873,0.202313,0.971037,-0.122021
2->3,0.105447,-0.097016,0.0,-0.289415,0.107362,-0.51337,0.350534,0.071035,0.0,-0.162225,-0.052008,0.0,-0.229039,-0.07544,-0.195383,0.328522,0.301659,-0.147197,0.059247
