In [1]:
%load_ext sparkmagic.magics
from dsx_core_utils import proxy_util,dsxhi_util
proxy_util.configure_proxy_livy()
dsxhi_util.list_livy_endpoints()

success configuring sparkmagic livy.
['https://qlawsbidlhe02a.ad.datalake.foc.zone:8445/gateway/dsx/livy2/v1']


In [2]:
%%spark config
{"executorCores": 4, "numExecutors": 5, "executorMemory": "10g", 
 "driverMemory": "8g", "proxyUser": "jchen-", "driverCores": 1, 
 "conf": {"spark.yarn.appMasterEnv.THEANO_FLAGS": "base_compiledir=${PWD}/.theano"}}

In [3]:
%spark add -s performance -k -l python -u https://qlawsbidlhe02a.ad.datalake.foc.zone:8445/gateway/dsx/livy2/v1

Starting Spark application


ID,YARN Application ID,Kind,State,Spark UI,Driver log,Current session?
13497,application_1566930137025_7094,pyspark,idle,Link,Link,✔


SparkSession available as 'spark'.


In [4]:
%%spark

print(spark.version)

2.3.0.2.6.5.0-292

In [5]:
%%spark

import pyspark
import os, sys

from pyspark.sql import SparkSession
from pyspark.sql import Window
import pyspark.sql.functions as F
from pyspark.sql.functions import col, when, lit, lower

from pyspark.ml.feature import QuantileDiscretizer
from pyspark.ml.classification import RandomForestClassifier
from pyspark.ml import Pipeline, PipelineModel
from pyspark.ml.feature import VectorIndexer, VectorAssembler, VectorSlicer, StringIndexer


from os.path import expanduser, join, abspath
import time
import pandas as pd

spark = SparkSession.builder.getOrCreate()

In [6]:
%%spark

csv_path = '/dev/projects/retention_models/csv_data/'
actual_path = '/dev/projects/retention_models/actual_value/'
monthly_path = '/dev/projects/retention_models/monthly_snapshot/'

result_path_refi = '/dev/projects/retention_models/refi_payoff/training/results/'
result_path_pur = '/dev/projects/retention_models/purchase_payoff/training/results/movermodel/'
result_path_pur_6 = '/dev/projects/retention_models/purchase_payoff/training/results/mover_6/'
result_path_pur_12 = '/dev/projects/retention_models/purchase_payoff/training/results/mover_12/'

## 1. Actual Payoff

#### (1). Prepare

In [7]:
%%spark

df_actual = spark.read.csv(csv_path + 'Payoff_segments_summary_20190905.csv', header=True)\
                        .withColumn('newloanpurpose', lower(col('NewLoanPurpose')))\
                        .withColumn('retentiontype', lower(col('PayoffRetentionType')))\
                        .select('ServicedLoanNumber', 'PaymentInFullDate', 'newloanpurpose', 'retentiontype')
    
df_actual.show(1)

+------------------+-----------------+--------------+-------------+
|ServicedLoanNumber|PaymentInFullDate|newloanpurpose|retentiontype|
+------------------+-----------------+--------------+-------------+
|        3357203106|       2018-09-20|      purchase|         lost|
+------------------+-----------------+--------------+-------------+
only showing top 1 row

In [8]:
%%spark

df_list1 = spark.sql('''
select distinct servicecalendardate, ln_no
from data_science_sandbox.servicing_dbo_payoff_source_purchase_client_crdtrs 
where leftportfoliodate is null
and servicecalendardate between '2016-06-30' and '2018-11-30'
''')

df_201812 = spark.read.parquet(monthly_path+'servicing_df_all_dec18_20190131.parquet').select('servicecalendardate', 'ln_no')

df_201901 = spark.read.parquet(monthly_path+'servicing_df_all_jan19_20190215.parquet').select('servicecalendardate', 'ln_no')
df_201902 = spark.read.parquet(monthly_path+'servicing_df_all_feb19_20190305.parquet').select('servicecalendardate', 'ln_no')
df_201903 = spark.read.parquet(monthly_path+'servicing_df_all_mar19_20190409.parquet').select('servicecalendardate', 'ln_no')
df_201904 = spark.read.parquet(monthly_path+'servicing_df_all_apr19_20190507.parquet').select('servicecalendardate', 'ln_no')
df_201905 = spark.read.parquet(monthly_path+'servicing_df_all_may19_20190610.parquet').select('servicecalendardate', 'ln_no')
df_201906 = spark.read.parquet(monthly_path+'servicing_df_all_june19_20190708.parquet').select('servicecalendardate', 'ln_no')
df_201907 = spark.read.parquet(monthly_path+'servicing_df_all_jul19_20190805.parquet').select('servicecalendardate', 'ln_no')
df_201908 = spark.read.parquet(monthly_path+'servicing_df_all_aug19_20190904.parquet').select('servicecalendardate', 'ln_no')

In [9]:
%%spark

df_list = df_list1.union(df_201812)\
                    .union(df_201901).union(df_201902).union(df_201903)\
                    .union(df_201904).union(df_201905).union(df_201906)\
                    .union(df_201907).union(df_201908)

df_list.show(1)

+-------------------+----------+
|servicecalendardate|     ln_no|
+-------------------+----------+
|         2017-12-31|3308904211|
+-------------------+----------+
only showing top 1 row

In [10]:
%%spark

df_payoff = df_list.join(df_actual, df_actual.ServicedLoanNumber == df_list.ln_no, how='left')

df_payoff.show(1)

+-------------------+----------+------------------+-----------------+--------------+-------------+
|servicecalendardate|     ln_no|ServicedLoanNumber|PaymentInFullDate|newloanpurpose|retentiontype|
+-------------------+----------+------------------+-----------------+--------------+-------------+
|         2017-08-31|3221138985|        3221138985|       2017-11-13|     refinance|     retained|
+-------------------+----------+------------------+-----------------+--------------+-------------+
only showing top 1 row

In [11]:
%%spark

def build_targets(df):
    
    # Purchase Payoff
    expr_purpayoff = when((col('PaymentInFullDate')>col('startdt')) & (col('PaymentInFullDate')<=col('enddt_3')) & (col('newloanpurpose')=='purchase'), 1.)\
                        .otherwise(0.)
    expr_purpayoff_3_6 = when((col('PaymentInFullDate')>col('enddt_3')) & (col('PaymentInFullDate')<=col('enddt_6')) & (col('newloanpurpose')=='purchase'), 1.)\
                            .otherwise(0.)
    expr_purpayoff_6_12 = when((col('PaymentInFullDate')>col('enddt_6')) & (col('PaymentInFullDate')<=col('enddt_12')) & (col('newloanpurpose')=='purchase'), 1.)\
                            .otherwise(0.)
    expr_purpayoff_all = when((col('PaymentInFullDate')>col('servicecalendardate')) & (col('newloanpurpose')=='purchase'), 1.)\
                            .otherwise(0.)
    
    # Refi Payoff
    expr_refipayoff = when((col('PaymentInFullDate')>col('startdt')) & (col('PaymentInFullDate')<=col('enddt_3')) & (col('newloanpurpose')!='purchase'), 1.)\
                        .otherwise(0.)
    expr_refipayoff_all = when((col('PaymentInFullDate')>col('servicecalendardate')) & (col('newloanpurpose')!='purchase'), 1.)\
                            .otherwise(0.)
    
    # Purchase/Refi Retained
    expr_purretain_24 = when((col('PaymentInFullDate')>col('startdt')) & (col('PaymentInFullDate')<=col('enddt_24')) 
                             & (col('newloanpurpose')=='purchase') & (col('retentiontype')=='retained'), 1.)\
                            .otherwise(0.)
    
    expr_refiretain_24 = when((col('PaymentInFullDate')>col('startdt')) & (col('PaymentInFullDate')<=col('enddt_24')) 
                             & (col('newloanpurpose')!='purchase') & (col('retentiontype')=='retained'), 1.)\
                            .otherwise(0.)
    
    # Payoff
    expr_payoff = when((col('PaymentInFullDate')>col('startdt')) & (col('PaymentInFullDate')<=col('enddt_3')), 1.)\
                        .otherwise(0.)
    expr_payoff_24 = when((col('PaymentInFullDate')>col('startdt')) & (col('PaymentInFullDate')<=col('enddt_24')), 1.)\
                        .otherwise(0.)
    expr_payoff_all = when(col('PaymentInFullDate')>col('servicecalendardate'), 1.)\
                        .otherwise(0.)
    
    # Retained
    expr_retention = when((col('PaymentInFullDate')>col('startdt')) & (col('PaymentInFullDate')<=col('enddt_3')) & (col('retentiontype')=='retained'), 1.)\
                        .otherwise(0.)
    expr_retention_3_6 = when((col('PaymentInFullDate')>col('enddt_3')) & (col('PaymentInFullDate')<=col('enddt_6')) & (col('retentiontype')=='retained'), 1.)\
                            .otherwise(0.)
    expr_retention_6_12 = when((col('PaymentInFullDate')>col('enddt_6')) & (col('PaymentInFullDate')<=col('enddt_12')) & (col('retentiontype')=='retained'), 1.)\
                            .otherwise(0.)
    expr_retention_24 = when((col('PaymentInFullDate')>col('startdt')) & (col('PaymentInFullDate')<=col('enddt_24')) & (col('retentiontype')=='retained'), 1.)\
                        .otherwise(0.)
    expr_retention_all = when((col('PaymentInFullDate')>col('servicecalendardate')) & (col('retentiontype')=='retained'), 1.)\
                        .otherwise(0.)

    df_final = df.withColumn('startdt', F.add_months(col('servicecalendardate'), 1))\
                    .withColumn('enddt_3', F.add_months(col('servicecalendardate'), 4))\
                    .withColumn('enddt_6', F.add_months(col('servicecalendardate'), 7))\
                    .withColumn('enddt_12', F.add_months(col('servicecalendardate'), 13))\
                    .withColumn('enddt_24', F.add_months(col('servicecalendardate'), 25))\
                    .withColumn('purchasepayoff', expr_purpayoff)\
                    .withColumn('purchasepayoff_3_6', expr_purpayoff_3_6)\
                    .withColumn('purchasepayoff_6_12', expr_purpayoff_6_12)\
                    .withColumn('purchasepayoff_all', expr_purpayoff_all)\
                    .withColumn('refipayoff', expr_refipayoff)\
                    .withColumn('refipayoff_all', expr_refipayoff_all)\
                    .withColumn('purretain_24', expr_purretain_24)\
                    .withColumn('refiretain_24', expr_refiretain_24)\
                    .withColumn('payoff', expr_payoff)\
                    .withColumn('payoff_24', expr_payoff_24)\
                    .withColumn('payoff_all', expr_payoff_all)\
                    .withColumn('retained', expr_retention)\
                    .withColumn('retained_3_6', expr_retention_3_6)\
                    .withColumn('retained_6_12', expr_retention_6_12)\
                    .withColumn('retained_24', expr_retention_24)\
                    .withColumn('retained_all', expr_retention_all)\
                    .select('servicecalendardate', 'ln_no', 'PaymentInFullDate', 'newloanpurpose', 'retentiontype',
                            'purchasepayoff', 'purchasepayoff_3_6', 'purchasepayoff_6_12', 'purchasepayoff_all',
                            'refipayoff', 'refipayoff_all', 
                            'purretain_24', 'refiretain_24',
                            'payoff', 'payoff_24', 'payoff_all', 
                            'retained', 'retained_3_6', 'retained_6_12', 'retained_24', 'retained_all')
                        
    return df_final

In [12]:
%%spark

df_final = build_targets(df_payoff)
df_final.show(1)

+-------------------+----------+-----------------+--------------+-------------+--------------+------------------+-------------------+------------------+----------+--------------+------------+-------------+------+---------+----------+--------+------------+-------------+-----------+------------+
|servicecalendardate|     ln_no|PaymentInFullDate|newloanpurpose|retentiontype|purchasepayoff|purchasepayoff_3_6|purchasepayoff_6_12|purchasepayoff_all|refipayoff|refipayoff_all|purretain_24|refiretain_24|payoff|payoff_24|payoff_all|retained|retained_3_6|retained_6_12|retained_24|retained_all|
+-------------------+----------+-----------------+--------------+-------------+--------------+------------------+-------------------+------------------+----------+--------------+------------+-------------+------+---------+----------+--------+------------+-------------+-----------+------------+
|         2017-08-31|3221138985|       2017-11-13|     refinance|     retained|           0.0|               0.0|  

In [13]:
%%spark

df_final.write.parquet(actual_path + 'Actual_payoff_20190905.parquet', mode='overwrite')

#### (2). Load

In [7]:
%%spark

actual = spark.read.parquet(actual_path + 'Actual_payoff_20190905.parquet')

actual.show(1)

+-------------------+----------+-----------------+--------------+-------------+--------------+------------------+-------------------+------------------+----------+--------------+------------+-------------+------+---------+----------+--------+------------+-------------+-----------+------------+
|servicecalendardate|     ln_no|PaymentInFullDate|newloanpurpose|retentiontype|purchasepayoff|purchasepayoff_3_6|purchasepayoff_6_12|purchasepayoff_all|refipayoff|refipayoff_all|purretain_24|refiretain_24|payoff|payoff_24|payoff_all|retained|retained_3_6|retained_6_12|retained_24|retained_all|
+-------------------+----------+-----------------+--------------+-------------+--------------+------------------+-------------------+------------------+----------+--------------+------------+-------------+------+---------+----------+--------+------------+-------------+-----------+------------+
|         2016-09-30|3222133893|       2016-12-12|     refinance|     retained|           0.0|               0.0|  

## 2. Actual Decile Performance

In [8]:
%%spark

def join_actual(score_file, servicecalendardate, df_actual, result_path):
    
    df_score = spark.read.csv(result_path + score_file, header=True)\
                            .withColumn('servicecalendardate', F.lit(servicecalendardate))\
                            .select('servicecalendardate', 'ln_no', 'logRegProb', 'randForProb', 'gbtProb')
        
    df_all = df_score.join(df_actual, on =['servicecalendardate', 'ln_no'], how='inner')
    
    return df_all

In [9]:
%%spark

def ntile_performance(n, df, ls_prob, nrow, loanpurpose):
    
    if loanpurpose == 'purchase':
        col_target = 'purchasepayoff'
        col_retained = 'retained'
        
    elif loanpurpose == 'purchase_6':
        col_target = 'purchasepayoff_6'
        col_retained = 'retained_6'
        
    elif loanpurpose == 'purchase_12':
        col_target = 'purchasepayoff_12'
        col_retained = 'retained_12'
        
    elif loanpurpose == 'refi':
        col_target = 'refipayoff'
        col_retained = 'retained'
        
    elif loanpurpose == 'refi_all':
        col_target = 'refipayoff_all'
        col_retained = 'retained_all'
        
    elif loanpurpose == 'all':
        col_target = 'payoff_all'
        col_retained = 'retained_all'
    
    expr_retained = when((col(col_target)==1.) & (col(col_retained)==1.), 1.).otherwise(0.)
    
    df = df.fillna({col: 0 for col in ls_prob})\
            .withColumn('retained_new', expr_retained)
    
    for prob_name in ls_prob:
        
        df1 = df.withColumn(prob_name, col(prob_name).cast('double'))
        
        df1 = QuantileDiscretizer(numBuckets=n, inputCol=prob_name, outputCol='ntile', relativeError=0.0001, handleInvalid='error')\
                    .fit(df1).transform(df1)
        df1 = df1.withColumn('ntile', (n - col('ntile')).cast('int'))
        
        windown_cumsum = Window.orderBy('ntile').rangeBetween(Window.unboundedPreceding, 0)
        total_target = df.select(F.sum(col_target)).collect()[0][0]
        
        df_out = df1.groupBy('ntile')\
                    .agg(F.sum('retained_new').alias('retained_new'), 
                         F.sum(col_target).alias(col_target), 
                         F.count('ln_no').alias('count'))\
                    .withColumn('retention_rate', col('retained_new')/col(col_target))\
                    .withColumn('percentage', col(col_target) / total_target)\
                    .withColumn('cum_sum', F.sum(col_target).over(windown_cumsum) / total_target)\
                    .sort('ntile').limit(nrow)
        
        print(prob_name + ':')
        print(df_out.show())

In [10]:
%%spark

def monthly_performance(loanpurpose, score_file, servicecalendardate, df_actual, result_path, n, ls_prob, nrow):
    
    df_all = join_actual(score_file, servicecalendardate, df_actual, result_path)
    
    ntile_performance(n, df_all, ls_prob, nrow, loanpurpose)

### (1). Mover (Purchase)

In [12]:
%%spark

#2017-06
monthly_performance(
    loanpurpose = 'purchase',
    score_file = 'pred_MoverV3_2_jun17_20190718.csv', 
    servicecalendardate = '2017-06-30', 
    df_actual = actual, 
    result_path = result_path_pur, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+--------------+------+-------------------+-------------------+-------------------+
|ntile|retained_new|purchasepayoff| count|     retention_rate|         percentage|            cum_sum|
+-----+------------+--------------+------+-------------------+-------------------+-------------------+
|    1|       579.0|        3850.0|142250|0.15038961038961038|0.25176562908710437|0.25176562908710437|
|    2|       522.0|        2177.0|142324|0.23977951309141018| 0.1423620193565263|0.39412764844363063|
|    3|       451.0|        1756.0|142099|0.25683371298405466|0.11483128433167669| 0.5089589327753073|
|    4|       389.0|        1519.0|142671|0.25608953258722844| 0.0993329845670939| 0.6082919173424013|
|    5|       304.0|        1324.0|142075|  0.229607250755287| 0.0865812189380068|  0.694873136280408|
|    6|       274.0|        1137.0|142489|0.24098504837291118|0.07435260266806173| 0.7692257389484698|
|    7|       234.0|        1108.0|142144| 0.2111913357400722

In [13]:
%%spark

#2017-09
monthly_performance(
    loanpurpose = 'purchase',
    score_file = 'pred_MoverV3_2_sep17_20190718.csv', 
    servicecalendardate = '2017-09-30', 
    df_actual = actual, 
    result_path = result_path_pur, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+--------------+------+-------------------+--------------------+------------------+
|ntile|retained_new|purchasepayoff| count|     retention_rate|          percentage|           cum_sum|
+-----+------------+--------------+------+-------------------+--------------------+------------------+
|    1|       475.0|        2934.0|146593|0.16189502385821405|  0.2510266940451745|0.2510266940451745|
|    2|       405.0|        1704.0|146756|0.23767605633802816|  0.1457905544147844|0.3968172484599589|
|    3|       377.0|        1455.0|147006|  0.259106529209622| 0.12448665297741274|0.5213039014373717|
|    4|       333.0|        1259.0|146787|0.26449563145353455| 0.10771731690622861|0.6290212183436003|
|    5|       242.0|         951.0|146831| 0.2544689800210305| 0.08136550308008214|0.7103867214236824|
|    6|       211.0|         876.0|146805| 0.2408675799086758| 0.07494866529774127|0.7853353867214237|
|    7|       207.0|         802.0|146905| 0.2581047381546135

In [14]:
%%spark

#2017-12
monthly_performance(
    loanpurpose = 'purchase',
    score_file = 'pred_MoverV3_2_dec17_20190718.csv', 
    servicecalendardate = '2017-12-31', 
    df_actual = actual, 
    result_path = result_path_pur, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+--------------+------+-------------------+--------------------+------------------+
|ntile|retained_new|purchasepayoff| count|     retention_rate|          percentage|           cum_sum|
+-----+------------+--------------+------+-------------------+--------------------+------------------+
|    1|       526.0|        2962.0|152537| 0.1775827143821742|  0.2324960753532182|0.2324960753532182|
|    2|       447.0|        1881.0|153454|0.23763955342902712| 0.14764521193092622|0.3801412872841444|
|    3|       406.0|        1581.0|152615| 0.2567994939911448| 0.12409733124018839|0.5042386185243328|
|    4|       325.0|        1327.0|153056| 0.2449133383571967| 0.10416012558869701|0.6083987441130299|
|    5|       259.0|        1101.0|152843|0.23524069028156222| 0.08642072213500784|0.6948194662480377|
|    6|       235.0|        1035.0|153301|0.22705314009661837| 0.08124018838304553|0.7760596546310832|
|    7|       192.0|         918.0|153085|0.20915032679738563

In [15]:
%%spark

#2018-3
monthly_performance(
    loanpurpose = 'purchase',
    score_file = 'pred_MoverV3_2_mar18_20190718.csv', 
    servicecalendardate = '2018-03-31', 
    df_actual = actual, 
    result_path = result_path_pur, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+--------------+------+-------------------+-------------------+-------------------+
|ntile|retained_new|purchasepayoff| count|     retention_rate|         percentage|            cum_sum|
+-----+------------+--------------+------+-------------------+-------------------+-------------------+
|    1|       818.0|        4809.0|157521|0.17009773341651072|0.24348134271682448|0.24348134271682448|
|    2|       726.0|        2863.0|157830| 0.2535801606706252| 0.1449546858386917| 0.3884360285555162|
|    3|       613.0|        2393.0|157713|0.25616381111575426| 0.1211584223583616| 0.5095944509138778|
|    4|       560.0|        2094.0|157601|0.26743075453677173|0.10601994835704522|  0.615614399270923|
|    5|       464.0|        1808.0|157851|0.25663716814159293|0.09153966887752518| 0.7071540681484482|
|    6|       377.0|        1537.0|157768|0.24528301886792453|0.07781884461546251| 0.7849729127639107|
|    7|       346.0|        1433.0|157797|0.24145150034891835

In [16]:
%%spark

#2018-6
monthly_performance(
    loanpurpose = 'purchase',
    score_file = 'pred_MoverV3_2_jun18_20190718.csv', 
    servicecalendardate = '2018-06-30', 
    df_actual = actual, 
    result_path = result_path_pur, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+--------------+------+-------------------+-------------------+------------------+
|ntile|retained_new|purchasepayoff| count|     retention_rate|         percentage|           cum_sum|
+-----+------------+--------------+------+-------------------+-------------------+------------------+
|    1|       668.0|        3599.0|161678| 0.1856071130869686| 0.2102956643683534|0.2102956643683534|
|    2|       612.0|        2348.0|162100| 0.2606473594548552| 0.1371976159869113|0.3474932803552647|
|    3|       555.0|        2108.0|161782|0.26328273244781786|0.12317400958279771|0.4706672899380624|
|    4|       516.0|        1860.0|162231|0.27741935483870966|0.10868294963188033|0.5793502395699427|
|    5|       401.0|        1595.0|161959|0.25141065830721004| 0.0931985508940049|0.6725487904639477|
|    6|       353.0|        1480.0|161869|0.23851351351351352|0.08647890615870048|0.7590276966226481|
|    7|       293.0|        1281.0|161897|0.22872755659640906| 0.07485

In [11]:
%%spark

#2018-07
monthly_performance(
    loanpurpose = 'purchase',
    score_file = 'pred_MoverV3_2_jul18_20190722.csv', 
    servicecalendardate = '2018-07-31', 
    df_actual = actual, 
    result_path = result_path_pur, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+--------------+------+-------------------+--------------------+------------------+
|ntile|retained_new|purchasepayoff| count|     retention_rate|          percentage|           cum_sum|
+-----+------------+--------------+------+-------------------+--------------------+------------------+
|    1|       715.0|        3296.0|165141|0.21692961165048544|  0.2181481236349196|0.2181481236349196|
|    2|       605.0|        2384.0|165231| 0.2537751677852349| 0.15778674961943212|0.3759348732543517|
|    3|       491.0|        2109.0|164806| 0.2328117591275486| 0.13958567741081473|0.5155205506651664|
|    4|       456.0|        1738.0|165554|0.26237054085155354| 0.11503077635846184|0.6305513270236283|
|    5|       352.0|        1495.0|165375| 0.2354515050167224|  0.0989476470977563|0.7294989741213846|
|    6|       299.0|        1300.0|165305|               0.23| 0.08604143225891853|0.8155404063803031|
|    7|       238.0|        1080.0|165103|0.22037037037037038| 0

In [12]:
%%spark

#2018-08
monthly_performance(
    loanpurpose = 'purchase',
    score_file = 'pred_MoverV3_2_aug18_20190722.csv', 
    servicecalendardate = '2018-08-31', 
    df_actual = actual, 
    result_path = result_path_pur, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+--------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+--------------+------+-------------------+--------------------+-------------------+
|    1|       670.0|        2879.0|166664| 0.2327196943383119|  0.1992939221929946| 0.1992939221929946|
|    2|       585.0|        2313.0|166998| 0.2529182879377432| 0.16011352623563616|0.35940744842863076|
|    3|       444.0|        2009.0|166822|0.22100547536087606| 0.13906963865429878|0.49847708708292954|
|    4|       378.0|        1647.0|166896|0.22950819672131148| 0.11401079883704832| 0.6124878859199778|
|    5|       323.0|        1487.0|166958| 0.2172158708809684| 0.10293506853108127| 0.7154229544510591|
|    6|       292.0|        1281.0|166978|0.22794691647150664| 0.08867506576214869| 0.8040980202132078|
|    7|       241.0|        1101.0|166879|0.21889191643

In [13]:
%%spark

#2018-09
monthly_performance(
    loanpurpose = 'purchase',
    score_file = 'pred_MoverV3_2_sep18_20190718.csv', 
    servicecalendardate = '2018-09-30', 
    df_actual = actual, 
    result_path = result_path_pur, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+--------------+------+-------------------+-------------------+-------------------+
|ntile|retained_new|purchasepayoff| count|     retention_rate|         percentage|            cum_sum|
+-----+------------+--------------+------+-------------------+-------------------+-------------------+
|    1|       590.0|        2970.0|168055|0.19865319865319866| 0.2421327246045981| 0.2421327246045981|
|    2|       446.0|        1806.0|168093| 0.2469545957918051|0.14723626284037175|0.38936898744496984|
|    3|       372.0|        1507.0|168594| 0.2468480424684804|0.12285993804011087| 0.5122289254850807|
|    4|       350.0|        1368.0|168132|0.25584795321637427|0.11152780042393608| 0.6237567259090168|
|    5|       285.0|        1167.0|168748| 0.2442159383033419|0.09514104027392793| 0.7188977661829448|
|    6|       233.0|        1061.0|168240|0.21960414703110273| 0.0864992662644709| 0.8053970324474157|
|    7|       196.0|         917.0|168383|0.21374045801526717|0.

In [13]:
%%spark

#2018-10
monthly_performance(
    loanpurpose = 'purchase',
    score_file = 'pred_MoverV3_2_oct18_20190722.csv', 
    servicecalendardate = '2018-10-31', 
    df_actual = actual, 
    result_path = result_path_pur, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+--------------+------+-------------------+--------------------+------------------+
|ntile|retained_new|purchasepayoff| count|     retention_rate|          percentage|           cum_sum|
+-----+------------+--------------+------+-------------------+--------------------+------------------+
|    1|       519.0|        2742.0|169709|0.18927789934354486|  0.2507544581618656|0.2507544581618656|
|    2|       372.0|        1612.0|169740|0.23076923076923078| 0.14741655235482395|0.3981710105166895|
|    3|       388.0|        1432.0|169999| 0.2709497206703911|  0.1309556470050297|0.5291266575217193|
|    4|       278.0|        1141.0|169975|0.24364592462751972| 0.10434385002286237|0.6334705075445817|
|    5|       237.0|        1046.0|170128|0.22657743785850862| 0.09565614997713763|0.7291266575217192|
|    6|       196.0|         888.0|170073|0.22072072072072071| 0.08120713305898491|0.8103337905807042|
|    7|       172.0|         788.0|169748| 0.2182741116751269| 0

In [14]:
%%spark

#2018-11
monthly_performance(
    loanpurpose = 'purchase',
    score_file = 'pred_MoverV3_2_nov18_20190722.csv', 
    servicecalendardate = '2018-11-30', 
    df_actual = actual, 
    result_path = result_path_pur, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+--------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+--------------+------+-------------------+--------------------+-------------------+
|    1|       535.0|        2656.0|170979|0.20143072289156627| 0.23627791121786318|0.23627791121786318|
|    2|       434.0|        1735.0|171448| 0.2501440922190202|  0.1543456987812472| 0.3906236099991104|
|    3|       330.0|        1411.0|171561|0.23387668320340185| 0.12552264033448982| 0.5161462503336002|
|    4|       289.0|        1256.0|171132| 0.2300955414012739| 0.11173383150965216| 0.6278800818432524|
|    5|       216.0|        1067.0|171354|0.20243673851921273| 0.09492038074904367|  0.722800462592296|
|    6|       191.0|         972.0|171252|0.19650205761316872| 0.08646917534027222| 0.8092696379325682|
|    7|       177.0|         799.0|171336|0.22152690863

In [11]:
%%spark

#2018-12
monthly_performance(
    loanpurpose = 'purchase',
    score_file = 'pred_MoverV3_2_dec18_20190718.csv', 
    servicecalendardate = '2018-12-31', 
    df_actual = actual, 
    result_path = result_path_pur, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+--------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+--------------+------+-------------------+--------------------+-------------------+
|    1|       632.0|        3196.0|172551|0.19774718397997496| 0.23206505954109788|0.23206505954109788|
|    2|       389.0|        1769.0|172439|0.21989824759751272| 0.12844902701132732|0.36051408655242523|
|    3|       389.0|        1640.0|172449| 0.2371951219512195| 0.11908219575951205|0.47959628231193724|
|    4|       337.0|        1583.0|172732|0.21288692356285535| 0.11494336334591926| 0.5945396456578566|
|    5|       324.0|        1443.0|172562|0.22453222453222454| 0.10477781004937554|  0.699317455707232|
|    6|       276.0|        1274.0|172760|0.21664050235478807| 0.09250653499854777| 0.7918239907057798|
|    7|       265.0|        1106.0|172568|0.23960216998

In [14]:
%%spark

#2019-01
monthly_performance(
    loanpurpose = 'purchase',
    score_file = 'pred_MoverV3_2_jan19_20190722.csv', 
    servicecalendardate = '2019-01-31', 
    df_actual = actual, 
    result_path = result_path_pur, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+--------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+--------------+------+-------------------+--------------------+-------------------+
|    1|       622.0|        2903.0|173601|  0.214261109197382| 0.16938966040378106|0.16938966040378106|
|    2|       531.0|        2339.0|173649|0.22702009405728943| 0.13648033609522697|0.30586999649900803|
|    3|       504.0|        2215.0|174294| 0.2275395033860045| 0.12924495273660871| 0.4351149492356168|
|    4|       439.0|        2127.0|173689| 0.2063939821344617| 0.12411016454662155| 0.5592251137822383|
|    5|       440.0|        1947.0|174023|0.22598870056497175| 0.11360718870346598| 0.6728323024857042|
|    6|       389.0|        1758.0|173821|0.22127417519908987| 0.10257906406815265| 0.7754113665538569|
|    7|       321.0|        1468.0|174023| 0.2186648501

In [15]:
%%spark

#2019-02
monthly_performance(
    loanpurpose = 'purchase',
    score_file = 'pred_MoverV3_2_feb19_20190722.csv', 
    servicecalendardate = '2019-02-28', 
    df_actual = actual, 
    result_path = result_path_pur, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+--------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+--------------+------+-------------------+--------------------+-------------------+
|    1|       716.0|        3253.0|175002|0.22010451890562557| 0.16977193257136894|0.16977193257136894|
|    2|       629.0|        2750.0|175194|0.22872727272727272| 0.14352069307447418| 0.3132926256458431|
|    3|       555.0|        2424.0|175342|0.22896039603960397| 0.12650696727728197| 0.4397995929231251|
|    4|       519.0|        2357.0|175179| 0.2201951633432329| 0.12301028130055842| 0.5628098742236836|
|    5|       479.0|        2227.0|175131|0.21508756174225416| 0.11622566671885601| 0.6790355409425395|
|    6|       443.0|        1998.0|175382|0.22172172172172172| 0.10427430718647253|  0.783309848129012|
|    7|       357.0|        1559.0|175323| 0.2289929441

In [12]:
%%spark

#2019-03
monthly_performance(
    loanpurpose = 'purchase',
    score_file = 'pred_MoverV3_2_mar19_20190722.csv', 
    servicecalendardate = '2019-03-31', 
    df_actual = actual, 
    result_path = result_path_pur, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+--------------+------+-------------------+-------------------+-------------------+
|ntile|retained_new|purchasepayoff| count|     retention_rate|         percentage|            cum_sum|
+-----+------------+--------------+------+-------------------+-------------------+-------------------+
|    1|       891.0|        5025.0|176704| 0.1773134328358209|0.24510999463440808|0.24510999463440808|
|    2|       663.0|        2842.0|177189|0.23328641801548206|0.13862738403004732| 0.3837373786644554|
|    3|       601.0|        2452.0|176706|0.24510603588907015|0.11960392175991415| 0.5033413004243695|
|    4|       562.0|        2225.0|176841|0.25258426966292136|0.10853129115652894| 0.6118725915808985|
|    5|       520.0|        2086.0|177305|   0.24928092042186|0.10175113409101995| 0.7136237256719185|
|    6|       411.0|        1808.0|177221|0.22732300884955753|0.08819081996000196| 0.8018145456319203|
|    7|       363.0|        1579.0|176727| 0.2298923369221026|0.

### (2). Refi Retention

In [20]:
%%spark

#2016-12
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_dec16_20190716.csv', 
    servicecalendardate = '2016-12-31', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|refipayoff| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+----------+------+-------------------+--------------------+-------------------+
|    1|      3030.0|    5463.0|132551| 0.5546403075233388| 0.21024476600985223|0.21024476600985223|
|    2|      2646.0|    4114.0|132629| 0.6431696645600389| 0.15832820197044334|0.36857296798029554|
|    3|      2571.0|    3732.0|132529| 0.6889067524115756|  0.1436268472906404| 0.5121998152709359|
|    4|      2018.0|    2992.0|132636| 0.6744652406417112| 0.11514778325123153| 0.6273475985221675|
|    5|      1729.0|    2608.0|132637| 0.6629601226993865| 0.10036945812807882| 0.7277170566502463|
|    6|      1524.0|    2235.0|132692| 0.6818791946308724| 0.08601447044334976| 0.8137315270935961|
|    7|      1167.0|    1844.0|132674| 0.6328633405639913|  0.0709667487684729|  0.88469

In [25]:
%%spark

#2017-03
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_mar17_20190716.csv', 
    servicecalendardate = '2017-03-31', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|refipayoff| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+----------+------+-------------------+--------------------+-------------------+
|    1|      3452.0|    6234.0|137694| 0.5537375681745268| 0.19777291329589797|0.19777291329589797|
|    2|      2791.0|    4352.0|137872| 0.6413143382352942| 0.13806668570159575| 0.3358395989974937|
|    3|      2800.0|    4159.0|137955| 0.6732387593171435| 0.13194378350940641|0.46778338250690016|
|    4|      2497.0|    3659.0|137850| 0.6824268925936048| 0.11608134259699883|  0.583864725103899|
|    5|      2172.0|    3284.0|137994| 0.6613885505481121| 0.10418451191269312| 0.6880492370165922|
|    6|      1883.0|    2875.0|137904| 0.6549565217391304|  0.0912090352463437| 0.7792582722629359|
|    7|      1560.0|    2384.0|137728| 0.6543624161073825| 0.07563211827035944| 0.854890

In [22]:
%%spark

#2017-06
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_jun17_20190716.csv', 
    servicecalendardate = '2017-06-30', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------+------+------------------+--------------------+-------------------+
|ntile|retained_new|refipayoff| count|    retention_rate|          percentage|            cum_sum|
+-----+------------+----------+------+------------------+--------------------+-------------------+
|    1|      4306.0|    7474.0|142276|0.5761305860315761|  0.1766401966345245| 0.1766401966345245|
|    2|      3577.0|    5274.0|142309|0.6782328403488813| 0.12464549064095293| 0.3012856872754774|
|    3|      3768.0|    5255.0|142254|0.7170313986679353| 0.12419644545282663|0.42548213272830404|
|    4|      3680.0|    4990.0|142497|0.7374749498997996| 0.11793344677632824| 0.5434155795046323|
|    5|      3395.0|    4591.0|142138|0.7394903071226312| 0.10850349782567593| 0.6519190773303082|
|    6|      3023.0|    4074.0|142458|0.7420225822287678| 0.09628474191718661| 0.7482038192474948|
|    7|      2693.0|    3681.0|142384|0.7315946753599565| 0.08699659671015315| 0.8352004159576479

In [23]:
%%spark

#2017-09
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_sep17_20190716.csv', 
    servicecalendardate = '2017-09-30', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------+------+------------------+--------------------+-------------------+
|ntile|retained_new|refipayoff| count|    retention_rate|          percentage|            cum_sum|
+-----+------------+----------+------+------------------+--------------------+-------------------+
|    1|      4190.0|    7291.0|146657|0.5746811137018242| 0.19032578051581914|0.19032578051581914|
|    2|      3274.0|    4946.0|146764|0.6619490497371613| 0.12911141275973687|0.31943719327555603|
|    3|      3491.0|    4895.0|147006|  0.71317671092952| 0.12778009815182206| 0.4472172914273781|
|    4|      3209.0|    4446.0|146679|0.7217723796671165|  0.1160593087605722| 0.5632766001879503|
|    5|      2938.0|    3962.0|146705|0.7415446744068652| 0.10342487208938081| 0.6667014722773311|
|    6|      2642.0|    3573.0|146947| 0.739434648754548|  0.0932703351780307| 0.7599718074553619|
|    7|      2326.0|    3161.0|147076|0.7358430876304967| 0.08251540148271902| 0.8424872089380808

In [24]:
%%spark

#2017-12
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_dec17_20190716.csv', 
    servicecalendardate = '2017-12-31', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------+------+------------------+-------------------+-------------------+
|ntile|retained_new|refipayoff| count|    retention_rate|         percentage|            cum_sum|
+-----+------------+----------+------+------------------+-------------------+-------------------+
|    1|      2898.0|    5334.0|152894|0.5433070866141733|  0.182953181272509|  0.182953181272509|
|    2|      2478.0|    4064.0|152762| 0.609744094488189| 0.1393929000171497|0.32234608128965875|
|    3|      2456.0|    3800.0|153159|0.6463157894736842| 0.1303378494254845| 0.4526839307151432|
|    4|      2282.0|    3434.0|152998|0.6645311589982528|0.11778425655976676|   0.57046818727491|
|    5|      2062.0|    3067.0|152854|0.6723182262797522|0.10519636425998971| 0.6756645515348997|
|    6|      1790.0|    2744.0|153092| 0.652332361516035|0.09411764705882353| 0.7697821985937232|
|    7|      1538.0|    2358.0|153003|0.6522476675148431|0.08087806551191905| 0.8506602641056422|
|    8| 

In [26]:
%%spark

#2018-3
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_mar18_20190716.csv', 
    servicecalendardate = '2018-01-31', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------+------+------------------+-------------------+-------------------+
|ntile|retained_new|refipayoff| count|    retention_rate|         percentage|            cum_sum|
+-----+------------+----------+------+------------------+-------------------+-------------------+
|    1|      1058.0|    2202.0|151309|  0.48047229791099|0.22785596026490065|0.22785596026490065|
|    2|       777.0|    1302.0|151356|0.5967741935483871|0.13472682119205298|0.36258278145695366|
|    3|       739.0|    1208.0|151264|0.6117549668874173|              0.125|0.48758278145695366|
|    4|       690.0|    1122.0|151414|0.6149732620320856|0.11610099337748345| 0.6036837748344371|
|    5|       603.0|     940.0|151366|0.6414893617021277| 0.0972682119205298| 0.7009519867549668|
|    6|       508.0|     834.0|151447|0.6091127098321343|0.08629966887417219| 0.7872516556291391|
|    7|       438.0|     686.0|151329|0.6384839650145773|0.07098509933774834| 0.8582367549668874|
|    8| 

In [27]:
%%spark

#2018-6
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_jun18_20190716.csv', 
    servicecalendardate = '2018-06-30', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|refipayoff| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+----------+------+-------------------+--------------------+-------------------+
|    1|      2818.0|    4927.0|161774| 0.5719504769636696|  0.1555092636429631| 0.1555092636429631|
|    2|      2769.0|    4381.0|161880| 0.6320474777448071| 0.13827604709150018| 0.2937853107344633|
|    3|      2699.0|    4116.0|162066| 0.6557337220602527| 0.12991194015718208|0.42369725089164534|
|    4|      2539.0|    3853.0|161940| 0.6589670386711654| 0.12161095855821734| 0.5453082094498627|
|    5|      2406.0|    3537.0|161856| 0.6802374893977947| 0.11163715557238899| 0.6569453650222516|
|    6|      2005.0|    3045.0|161917| 0.6584564860426929| 0.09610832307546634|  0.753053688097718|
|    7|      1780.0|    2741.0|162079|  0.649398029916089| 0.08651327210175803| 0.839566

In [31]:
%%spark

#2018-7
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_jul18_20190716.csv', 
    servicecalendardate = '2018-07-31', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+----------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|refipayoff| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+----------+------+-------------------+--------------------+-------------------+
|    1|      4406.0|    7136.0|165100|  0.617432735426009|  0.2423748386658515| 0.2423748386658515|
|    2|      3392.0|    4966.0|164936| 0.6830447039871124| 0.16867060661639835|0.41104544528224984|
|    3|      2679.0|    4003.0|165631|  0.669248063952036| 0.13596223082671013|   0.54700767610896|
|    4|      2203.0|    3370.0|165153| 0.6537091988130563| 0.11446233272196182| 0.6614700088309218|
|    5|      1908.0|    2961.0|165063| 0.6443768996960486| 0.10057061340941512|  0.762040622240337|
|    6|      1518.0|    2413.0|165286|  0.629092416079569| 0.08195774743563616| 0.8439983696759731|
|    7|      1126.0|    1886.0|165155| 0.5970307529162248| 0.06405814822362611| 0.908056517

In [32]:
%%spark

#2018-8
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_aug18_20190716.csv', 
    servicecalendardate = '2018-08-31', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+----------+------+-------------------+-------------------+-------------------+
|ntile|retained_new|refipayoff| count|     retention_rate|         percentage|            cum_sum|
+-----+------------+----------+------+-------------------+-------------------+-------------------+
|    1|      4322.0|    7085.0|166680| 0.6100211714890614|0.24121612419991828|0.24121612419991828|
|    2|      3319.0|    4845.0|166895| 0.6850361197110423|0.16495301647827862| 0.4061691406781969|
|    3|      2733.0|    4085.0|167058| 0.6690330477356181|0.13907803350129375| 0.5452471741794906|
|    4|      2246.0|    3363.0|166853| 0.6678560808801666|0.11449679967315811| 0.6597439738526488|
|    5|      1937.0|    2945.0|166764| 0.6577249575551782|0.10026555903581642| 0.7600095328884652|
|    6|      1514.0|    2458.0|166647| 0.6159479251423922|0.08368514231240637| 0.8436946752008716|
|    7|      1111.0|    1875.0|167360| 0.5925333333333334|0.06383630668664034| 0.9075309818875119|
|

In [28]:
%%spark

#2018-09
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_sep18_20190716.csv', 
    servicecalendardate = '2018-09-30', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------+------+------------------+-------------------+-------------------+
|ntile|retained_new|refipayoff| count|    retention_rate|         percentage|            cum_sum|
+-----+------------+----------+------+------------------+-------------------+-------------------+
|    1|      2404.0|    3827.0|168191|0.6281682780245623|0.13714879587155962|0.13714879587155962|
|    2|      2290.0|    3561.0|168186|0.6430777871384442| 0.1276161123853211|0.26476490825688076|
|    3|      2004.0|    3260.0|168427|0.6147239263803681|0.11682912844036697|0.38159403669724773|
|    4|      2050.0|    3258.0|168237| 0.629220380601596|0.11675745412844037| 0.4983514908256881|
|    5|      2124.0|    3224.0|168413|0.6588089330024814|0.11553899082568807| 0.6138904816513762|
|    6|      1932.0|    2987.0|168475|0.6468028121861399|0.10704558486238532| 0.7209360665137615|
|    7|      1763.0|    2713.0|168179|0.6498341319572429|0.09722620412844037| 0.8181622706422018|
|    8| 

In [33]:
%%spark

#2018-10
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_oct18_20190716.csv', 
    servicecalendardate = '2018-10-31', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+----------+------+------------------+--------------------+-------------------+
|ntile|retained_new|refipayoff| count|    retention_rate|          percentage|            cum_sum|
+-----+------------+----------+------+------------------+--------------------+-------------------+
|    1|      4472.0|    7338.0|169714|0.6094303624965931| 0.24949000407996735|0.24949000407996735|
|    2|      3217.0|    4579.0|170052|0.7025551430443329| 0.15568475452196381| 0.4051747586019312|
|    3|      2751.0|    3903.0|169853|0.7048424289008455| 0.13270093839249286| 0.5378756969944241|
|    4|      2272.0|    3338.0|169902|0.6806470940683044| 0.11349109207126343| 0.6513667890656875|
|    5|      1924.0|    2914.0|170099|0.6602608098833219| 0.09907520739834082| 0.7504419964640283|
|    6|      1457.0|    2302.0|169901|0.6329278887923545| 0.07826737386100911| 0.8287093703250374|
|    7|      1184.0|    1893.0|169729|0.6254622292657158| 0.06436148510811913| 0.8930708554331566|
|

In [34]:
%%spark

#2018-11
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_nov18_20190716.csv', 
    servicecalendardate = '2018-11-30', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+----------+------+------------------+--------------------+-------------------+
|ntile|retained_new|refipayoff| count|    retention_rate|          percentage|            cum_sum|
+-----+------------+----------+------+------------------+--------------------+-------------------+
|    1|      4939.0|    7800.0|170861|0.6332051282051282| 0.23701722932936278|0.23701722932936278|
|    2|      3716.0|    5126.0|171564|0.7249317206398751| 0.15576286122337354| 0.3927800905527363|
|    3|      2978.0|    4253.0|171434| 0.700211615330355|  0.1292351636330487| 0.5220152541857851|
|    4|      2474.0|    3647.0|171414|0.6783657800932273|  0.1108207481236136| 0.6328360023093986|
|    5|      2168.0|    3178.0|171318|0.6821900566393958| 0.09656932753957884| 0.7294053298489774|
|    6|      1682.0|    2626.0|171456|0.6405178979436406| 0.07979580054088548|  0.809201130389863|
|    7|      1491.0|    2353.0|171224|0.6336591585210369| 0.07150019751435778| 0.8807013279042207|
|

In [29]:
%%spark

#2018-12
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_dec18_20190716.csv', 
    servicecalendardate = '2018-12-31', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------+------+------------------+-------------------+-------------------+
|ntile|retained_new|refipayoff| count|    retention_rate|         percentage|            cum_sum|
+-----+------------+----------+------+------------------+-------------------+-------------------+
|    1|      3600.0|    5689.0|172196|0.6328001406222534|0.14656704882133195|0.14656704882133195|
|    2|      3303.0|    4914.0|172633|0.6721611721611722|0.12660054102795312| 0.2731675898492851|
|    3|      3225.0|    4746.0|172906| 0.679519595448799|0.12227231740306582| 0.3954399072523509|
|    4|      3091.0|    4486.0|172597|0.6890325456977262| 0.1155738760788355| 0.5110137833311864|
|    5|      2881.0|    4225.0|172571|0.6818934911242603|0.10884967151874275| 0.6198634548499291|
|    6|      2663.0|    3848.0|172353| 0.692047817047817|0.09913693159860879| 0.7190003864485379|
|    7|      2271.0|    3330.0|172438| 0.681981981981982|0.08579157542187299| 0.8047919618704109|
|    8| 

In [35]:
%%spark

#2019-1
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_jan19_20190716.csv', 
    servicecalendardate = '2019-01-31', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+----------+------+------------------+-------------------+-------------------+
|ntile|retained_new|refipayoff| count|    retention_rate|         percentage|            cum_sum|
+-----+------------+----------+------+------------------+-------------------+-------------------+
|    1|      6371.0|    9949.0|173461|0.6403658659161725|0.22517200796668477|0.22517200796668477|
|    2|      4365.0|    6349.0|173957|0.6875098440699323|0.14369455006337137|0.36886655803005614|
|    3|      3777.0|    5541.0|173925|0.6816459122902003|0.12540738728951656| 0.4942739453195727|
|    4|      3179.0|    4839.0|173909|0.6569539160983674|0.10951928299837045| 0.6037932283179431|
|    5|      2839.0|    4408.0|173853|0.6440562613430127|0.09976462067716821| 0.7035578489951113|
|    6|      2484.0|    3834.0|173659| 0.647887323943662|0.08677349266702879| 0.7903313416621401|
|    7|      2099.0|    3356.0|173829|0.6254469606674613| 0.0759550968676444| 0.8662864385297845|
|    8|    

In [36]:
%%spark

#2019-2
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_feb19_20190716.csv', 
    servicecalendardate = '2019-02-28', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+----------+------+------------------+--------------------+-------------------+
|ntile|retained_new|refipayoff| count|    retention_rate|          percentage|            cum_sum|
+-----+------------+----------+------+------------------+--------------------+-------------------+
|    1|      6545.0|   10606.0|174952|0.6171035263058646| 0.22050812923614288|0.22050812923614288|
|    2|      4473.0|    6818.0|175026|0.6560574948665298| 0.14175225581105244| 0.3622603850471953|
|    3|      3774.0|    5901.0|175657|0.6395526182003051| 0.12268701401305668|0.48494739906025197|
|    4|      3292.0|    5180.0|174898|0.6355212355212355| 0.10769678572913635| 0.5926441847893883|
|    5|      2923.0|    4799.0|175425|0.6090852260887685| 0.09977545843902033| 0.6924196432284087|
|    6|      2576.0|    4260.0|175420|0.6046948356807512|  0.0885691712753129| 0.7809888145037216|
|    7|      2147.0|    3684.0|175185|0.5827904451682954| 0.07659362135639736|  0.857582435860119|
|

In [37]:
%%spark

#2019-3
monthly_performance(
    loanpurpose = 'refi',
    score_file = 'pred_refiV1_3_mar19_20190716.csv', 
    servicecalendardate = '2019-03-31', 
    df_actual = actual, 
    result_path = result_path_refi, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+----------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|refipayoff| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+----------+------+-------------------+--------------------+-------------------+
|    1|      6076.0|   10605.0|176667|  0.572937293729373| 0.23926629515150147|0.23926629515150147|
|    2|      4070.0|    6560.0|177148| 0.6204268292682927| 0.14800442208334275| 0.3872707172348442|
|    3|      3307.0|    5458.0|176818| 0.6058995969219494| 0.12314148410531778|  0.510412201340162|
|    4|      2853.0|    4706.0|177039| 0.6062473438164045| 0.10617512352503215| 0.6165873248651942|
|    5|      2517.0|    4298.0|176868| 0.5856212191717077| 0.09696997044423888|  0.713557295309433|
|    6|      2044.0|    3635.0|177128| 0.5623108665749657| 0.08201159668794983| 0.7955688919973829|
|    7|      1789.0|    3270.0|177153| 0.5470948012232416|  0.0737765945445931|  0.86934548

### (3). Mover_6 (Purchase 3-6 Months)

In [13]:
%%spark

#2017-03
monthly_performance(
    loanpurpose = 'purchase_6',
    score_file = 'pred_Mover_3to6_V1_mar17_20190719.csv', 
    servicecalendardate = '2017-03-31', 
    df_actual = actual, 
    result_path = result_path_pur_6, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff_6| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+----------------+------+-------------------+--------------------+-------------------+
|    1|       548.0|          2746.0|137573| 0.1995630007283321| 0.18290814627322988|0.18290814627322988|
|    2|       515.0|          2160.0|138125|0.23842592592592593|  0.1438753080663425|0.32678345433957234|
|    3|       395.0|          1825.0|137692|0.21643835616438356|  0.1215613135282755| 0.4483447678678479|
|    4|       367.0|          1649.0|137914|0.22255912674348088| 0.10983814027842537| 0.5581829081462732|
|    5|       361.0|          1502.0|137961| 0.2403462050599201| 0.10004662625724373|  0.658229534403517|
|    6|       280.0|          1297.0|137778| 0.2158828064764842|  0.0863917937787251|  0.744621328182242|
|    7|       239.0|          1208

In [14]:
%%spark

#2017-06
monthly_performance(
    loanpurpose = 'purchase_6',
    score_file = 'pred_Mover_3to6_V1_jun17_20190719.csv', 
    servicecalendardate = '2017-06-30', 
    df_actual = actual, 
    result_path = result_path_pur_6, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------------+------+-------------------+--------------------+------------------+
|ntile|retained_new|purchasepayoff_6| count|     retention_rate|          percentage|           cum_sum|
+-----+------------+----------------+------+-------------------+--------------------+------------------+
|    1|       421.0|          2073.0|142175|0.20308731307284128|  0.1812062937062937|0.1812062937062937|
|    2|       395.0|          1631.0|142106| 0.2421827099938688| 0.14256993006993007|0.3237762237762238|
|    3|       338.0|          1403.0|142624|0.24091233071988596| 0.12263986013986014|0.4464160839160839|
|    4|       313.0|          1246.0|142189|0.25120385232744785| 0.10891608391608391|0.5553321678321679|
|    5|       231.0|          1092.0|142661|0.21153846153846154| 0.09545454545454546|0.6507867132867133|
|    6|       266.0|          1049.0|142263|0.25357483317445184|  0.0916958041958042|0.7424825174825175|
|    7|       190.0|           931.0|142346

In [15]:
%%spark

#2017-09
monthly_performance(
    loanpurpose = 'purchase_6',
    score_file = 'pred_Mover_3to6_V1_sep17_20190719.csv', 
    servicecalendardate = '2017-09-30', 
    df_actual = actual, 
    result_path = result_path_pur_6, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff_6| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+----------------+------+-------------------+--------------------+-------------------+
|    1|       445.0|          2324.0|146558| 0.1914802065404475| 0.18650188588395794|0.18650188588395794|
|    2|       434.0|          1869.0|147008|0.23220973782771537| 0.14998796244282162|0.33648984832677953|
|    3|       353.0|          1607.0|146548| 0.2196639701306783|  0.1289623625712222|0.46545221089800176|
|    4|       344.0|          1497.0|146998|0.22979291917167669| 0.12013482064039804| 0.5855870315383999|
|    5|       289.0|          1207.0|146948|0.23943661971830985| 0.09686221009549795| 0.6824492416338978|
|    6|       244.0|          1052.0|146876|0.23193916349809887| 0.08442340101115481| 0.7668726426450526|
|    7|       218.0|           972

In [16]:
%%spark

#2017-12
monthly_performance(
    loanpurpose = 'purchase_6',
    score_file = 'pred_Mover_3to6_V1_dec17_20190719.csv', 
    servicecalendardate = '2017-12-31', 
    df_actual = actual, 
    result_path = result_path_pur_6, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff_6| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+----------------+------+-------------------+--------------------+-------------------+
|    1|       740.0|          3391.0|152795| 0.2182247124741964| 0.17499225926308185|0.17499225926308185|
|    2|       752.0|          2920.0|152999|0.25753424657534246| 0.15068634534007638|0.32567860460315823|
|    3|       573.0|          2485.0|152970|0.23058350100603622| 0.12823820827742802| 0.4539168128805862|
|    4|       502.0|          2112.0|153168|0.23768939393939395| 0.10898957580761688| 0.5629063886882031|
|    5|       447.0|          1894.0|152805|0.23600844772967264| 0.09773970481989885|  0.660646093508102|
|    6|       398.0|          1746.0|153113| 0.2279495990836197| 0.09010217772731964| 0.7507482712354216|
|    7|       356.0|          1625

In [17]:
%%spark

#2018-03
monthly_performance(
    loanpurpose = 'purchase_6',
    score_file = 'pred_Mover_3to6_V1_mar18_20190719.csv', 
    servicecalendardate = '2018-03-31', 
    df_actual = actual, 
    result_path = result_path_pur_6, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff_6| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+----------------+------+-------------------+--------------------+-------------------+
|    1|       652.0|          2963.0|157605| 0.2200472494093824| 0.17610698365527488|0.17610698365527488|
|    2|       610.0|          2343.0|157364|0.26034997865983783| 0.13925705794947993|0.31536404160475484|
|    3|       570.0|          2087.0|158310| 0.2731193100143747| 0.12404160475482913|0.43940564635958396|
|    4|       471.0|          1839.0|157725| 0.2561174551386623| 0.10930163447251115| 0.5487072808320951|
|    5|       420.0|          1723.0|157555| 0.2437608821822403| 0.10240713224368499| 0.6511144130757801|
|    6|       372.0|          1613.0|157790| 0.2306261624302542| 0.09586924219910847| 0.7469836552748885|
|    7|       302.0|          1443

In [18]:
%%spark

#2018-06
monthly_performance(
    loanpurpose = 'purchase_6',
    score_file = 'pred_Mover_3to6_V1_jun18_20190719.csv', 
    servicecalendardate = '2018-06-30', 
    df_actual = actual, 
    result_path = result_path_pur_6, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------------+------+-------------------+-------------------+------------------+
|ntile|retained_new|purchasepayoff_6| count|     retention_rate|         percentage|           cum_sum|
+-----+------------+----------------+------+-------------------+-------------------+------------------+
|    1|       480.0|          2111.0|161900| 0.2273803884414969|  0.175887352107982| 0.175887352107982|
|    2|       389.0|          1628.0|161782|0.23894348894348894| 0.1356440593234461|0.3115314114314281|
|    3|       339.0|          1386.0|161836|0.24458874458874458| 0.1154807532077987|0.4270121646392268|
|    4|       333.0|          1285.0|161981|0.25914396887159535|0.10706548908515247|0.5340776537243793|
|    5|       275.0|          1154.0|162060|0.23830155979202772|0.09615064155974004|0.6302282952841193|
|    6|       263.0|          1151.0|161987|0.22849695916594265|0.09590068321946342|0.7261289785035827|
|    7|       219.0|          1042.0|161912|0.210172

In [18]:
%%spark

#2018-07
monthly_performance(
    loanpurpose = 'purchase_6',
    score_file = 'pred_Mover_3to6_V1_jul18_20190722.csv', 
    servicecalendardate = '2018-07-31', 
    df_actual = actual, 
    result_path = result_path_pur_6, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+----------------+------+-------------------+-------------------+-------------------+
|ntile|retained_new|purchasepayoff_6| count|     retention_rate|         percentage|            cum_sum|
+-----+------------+----------------+------+-------------------+-------------------+-------------------+
|    1|       493.0|          2247.0|164913|0.21940364931019138|0.20849958244409392|0.20849958244409392|
|    2|       411.0|          1657.0|165160|  0.248038624019312| 0.1537533636447991|  0.362252946088893|
|    3|       335.0|          1390.0|165502|0.24100719424460432|0.12897837988308436| 0.4912313259719774|
|    4|       297.0|          1281.0|165055|0.23185011709601874|0.11886424793541801| 0.6100955739073953|
|    5|       212.0|          1048.0|165241|0.20229007633587787|0.09724413101976431| 0.7073397049271597|
|    6|       192.0|           936.0|165387|0.20512820512820512|0.08685162846803378| 0.7941913333951934|
|    7|       155.0|           817.0|165088|  

In [19]:
%%spark

#2018-08
monthly_performance(
    loanpurpose = 'purchase_6',
    score_file = 'pred_Mover_3to6_V1_aug18_20190722.csv', 
    servicecalendardate = '2018-08-31', 
    df_actual = actual, 
    result_path = result_path_pur_6, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+----------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff_6| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+----------------+------+-------------------+--------------------+-------------------+
|    1|       497.0|          2238.0|166697|0.22207327971403037| 0.20229594142637622|0.20229594142637622|
|    2|       405.0|          1726.0|166950|0.23464658169177288| 0.15601554731989514|0.35831148874627133|
|    3|       324.0|          1421.0|166639|0.22800844475721324|  0.1284461719244328|0.48675766067070414|
|    4|       281.0|          1246.0|167081|0.22552166934189405| 0.11262767784506915| 0.5993853385157732|
|    5|       222.0|          1078.0|167032|0.20593692022263452| 0.09744192352888005| 0.6968272620446534|
|    6|       172.0|           961.0|166613|0.17898022892819979| 0.08686613034439121| 0.7836933923890446|
|    7|       165.0|           847.0|

In [19]:
%%spark

#2018-09
monthly_performance(
    loanpurpose = 'purchase_6',
    score_file = 'pred_Mover_3to6_V1_sep18_20190719.csv', 
    servicecalendardate = '2018-09-30', 
    df_actual = actual, 
    result_path = result_path_pur_6, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+----------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff_6| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+----------------+------+-------------------+--------------------+-------------------+
|    1|       300.0|          1403.0|168192|0.21382751247327156| 0.10538571321264929|0.10538571321264929|
|    2|       352.0|          1545.0|168330|  0.227831715210356| 0.11605197926838429|0.22143769248103357|
|    3|       427.0|          1942.0|168395|0.21987641606591143| 0.14587245549462932|0.36731014797566286|
|    4|       363.0|          1874.0|168329| 0.1937033084311633| 0.14076466611582664| 0.5080748140914895|
|    5|       358.0|          1620.0|168313|0.22098765432098766| 0.12168557049500488| 0.6297603845864944|
|    6|       294.0|          1319.0|168436|0.22289613343442002| 0.09907609103883422| 0.7288364756253286|
|    7|       248.0|          1055

In [20]:
%%spark

#2018-10
monthly_performance(
    loanpurpose = 'purchase_6',
    score_file = 'pred_Mover_3to6_V1_oct18_20190722.csv', 
    servicecalendardate = '2018-10-31', 
    df_actual = actual, 
    result_path = result_path_pur_6, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+----------------+------+-------------------+-------------------+-------------------+
|ntile|retained_new|purchasepayoff_6| count|     retention_rate|         percentage|            cum_sum|
+-----+------------+----------------+------+-------------------+-------------------+-------------------+
|    1|       700.0|          3341.0|169804|0.20951810835079318|0.20489390408438612|0.20489390408438612|
|    2|       593.0|          2584.0|169624|0.22948916408668732|0.15846927511345518| 0.3633631791978413|
|    3|       451.0|          2127.0|170187|0.21203573107663376| 0.1304427817981111| 0.4938059609959524|
|    4|       406.0|          1847.0|169821| 0.2198159177043855|0.11327118851956335| 0.6070771495155157|
|    5|       368.0|          1652.0|170231|0.22276029055690072|0.10131240034343186| 0.7083895498589476|
|    6|       279.0|          1373.0|169764|0.20320466132556445|0.08420213418373605| 0.7925916840426837|
|    7|       273.0|          1215.0|169765|0.

In [21]:
%%spark

#2018-11
monthly_performance(
    loanpurpose = 'purchase_6',
    score_file = 'pred_Mover_3to6_V1_nov18_20190722.csv', 
    servicecalendardate = '2018-11-30', 
    df_actual = actual, 
    result_path = result_path_pur_6, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+----------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff_6| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+----------------+------+-------------------+--------------------+-------------------+
|    1|       732.0|          3407.0|171141| 0.2148517757557969| 0.20071874631789796|0.20071874631789796|
|    2|       678.0|          2742.0|171501|0.24726477024070023|  0.1615411806291976| 0.3622599269470956|
|    3|       513.0|          2278.0|171370|0.22519754170324846|  0.1342052550960292| 0.4964651820431248|
|    4|       452.0|          1950.0|171332| 0.2317948717948718| 0.11488158359844468| 0.6113467656415694|
|    5|       371.0|          1633.0|171350| 0.2271892222902633| 0.09620596205962059|   0.70755272770119|
|    6|       324.0|          1452.0|171297| 0.2231404958677686| 0.08554259455638034| 0.7930953222575704|
|    7|       278.0|          1288.0|

### (4). Mover_12 (Purchase 6-12 Months)

In [20]:
%%spark

#2016-09
monthly_performance(
    loanpurpose = 'purchase_12',
    score_file = 'pred_Mover_6to12_V1_sep16_20190722.csv', 
    servicecalendardate = '2016-09-30', 
    df_actual = actual, 
    result_path = result_path_pur_12, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+-----------------+------+-------------------+-------------------+------------------+
|ntile|retained_new|purchasepayoff_12| count|     retention_rate|         percentage|           cum_sum|
+-----+------------+-----------------+------+-------------------+-------------------+------------------+
|    1|      1077.0|           4813.0|123108|0.22376895906918762| 0.1669789064668332|0.1669789064668332|
|    2|       932.0|           3935.0|123687|0.23684879288437102| 0.1365181792950319|0.3034970857618651|
|    3|       840.0|           3703.0|123448|0.22684310018903592| 0.1284693311129614|0.4319664168748265|
|    4|       754.0|           3289.0|123360|0.22924901185770752|0.11410630030530114|0.5460727171801276|
|    5|       655.0|           3126.0|123559|  0.209532949456174|0.10845129059117402|0.6545240077713017|
|    6|       558.0|           2820.0|123388|0.19787234042553192|0.09783513738551207|0.7523591451568138|
|    7|       475.0|           2441.0|12353

In [21]:
%%spark

#2016-12
monthly_performance(
    loanpurpose = 'purchase_12',
    score_file = 'pred_Mover_6to12_V1_dec16_20190722.csv', 
    servicecalendardate = '2016-12-31', 
    df_actual = actual, 
    result_path = result_path_pur_12, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+-----------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff_12| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+-----------------+------+-------------------+--------------------+-------------------+
|    1|       956.0|           3905.0|132464| 0.2448143405889885| 0.15679582413170046|0.15679582413170046|
|    2|       790.0|           3380.0|132740|0.23372781065088757| 0.13571571973499297| 0.2925115438666934|
|    3|       737.0|           3107.0|132404|0.23720630833601544| 0.12475406544870508| 0.4172656093153985|
|    4|       629.0|           2849.0|132736|0.22077922077922077| 0.11439469985946597| 0.5316603091748645|
|    5|       542.0|           2721.0|132603|  0.199191473722896| 0.10925516964464967| 0.6409154788195142|
|    6|       467.0|           2467.0|132698|0.18929874341305228| 0.09905641437462356| 0.7399718931941377|
|    7|       420.0|     

In [22]:
%%spark

#2017-03
monthly_performance(
    loanpurpose = 'purchase_12',
    score_file = 'pred_Mover_6to12_V1_mar17_20190722.csv', 
    servicecalendardate = '2017-03-31', 
    df_actual = actual, 
    result_path = result_path_pur_12, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+-----------------+------+-------------------+-------------------+-------------------+
|ntile|retained_new|purchasepayoff_12| count|     retention_rate|         percentage|            cum_sum|
+-----+------------+-----------------+------+-------------------+-------------------+-------------------+
|    1|       867.0|           3678.0|137822|0.23572593800978792|0.16432113657686637|0.16432113657686637|
|    2|       717.0|           3071.0|137746| 0.2334744382937154|0.13720234106241344|0.30152347763927984|
|    3|       653.0|           2764.0|138001| 0.2362518089725036|0.12348657463253362|0.42501005227181343|
|    4|       576.0|           2554.0|137551| 0.2255285826155051|0.11410445427333243| 0.5391145065451459|
|    5|       538.0|           2375.0|138162| 0.2265263157894737|0.10610731358620382| 0.6452218201313497|
|    6|       430.0|           2163.0|137896|0.19879796578825706|0.09663583969977214| 0.7418576598311218|
|    7|       373.0|           192

In [23]:
%%spark

#2017-06
monthly_performance(
    loanpurpose = 'purchase_12',
    score_file = 'pred_Mover_6to12_V1_jun17_20190722.csv', 
    servicecalendardate = '2017-06-30', 
    df_actual = actual, 
    result_path = result_path_pur_12, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+-----------------+------+-------------------+-------------------+-------------------+
|ntile|retained_new|purchasepayoff_12| count|     retention_rate|         percentage|            cum_sum|
+-----+------------+-----------------+------+-------------------+-------------------+-------------------+
|    1|      1182.0|           4943.0|142187| 0.2391260368197451|0.16667790666306986|0.16667790666306986|
|    2|      1001.0|           4282.0|142306|0.23376926669780476|0.14438899379552197|0.31106690045859187|
|    3|       865.0|           3741.0|142136|0.23122159850307406|0.12614647963312653| 0.4372133800917184|
|    4|       779.0|           3389.0|142510| 0.2298613160224255| 0.1142770434313461| 0.5514904235230644|
|    5|       683.0|           3074.0|142327|0.22218607677293428|0.10365524683032101| 0.6551456703533854|
|    6|       639.0|           2897.0|142470| 0.2205730065585088| 0.0976868087402212| 0.7528324790936067|
|    7|       504.0|           249

In [24]:
%%spark

#2017-09
monthly_performance(
    loanpurpose = 'purchase_12',
    score_file = 'pred_Mover_6to12_V1_sep17_20190722.csv', 
    servicecalendardate = '2017-09-30', 
    df_actual = actual, 
    result_path = result_path_pur_12, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+-----------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff_12| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+-----------------+------+-------------------+--------------------+-------------------+
|    1|      1291.0|           5429.0|146552|0.23779701602505066| 0.15981748601707388|0.15981748601707388|
|    2|      1184.0|           4705.0|146880| 0.2516471838469713| 0.13850456284957316|0.29832204886664704|
|    3|      1061.0|           4308.0|146836|0.24628597957288764|  0.1268177803944657|0.42513982926111277|
|    4|       920.0|           3854.0|146766| 0.2387130254281266| 0.11345304680600529|  0.538592876067118|
|    5|       865.0|           3660.0|146992|0.23633879781420766| 0.10774212540476891| 0.6463350014718869|
|    6|       720.0|           3286.0|146855|0.21911138161898966| 0.09673241095083898| 0.7430674124227259|
|    7|       628.0|     

In [25]:
%%spark

#2017-12
monthly_performance(
    loanpurpose = 'purchase_12',
    score_file = 'pred_Mover_6to12_V1_dec17_20190722.csv', 
    servicecalendardate = '2017-12-31', 
    df_actual = actual, 
    result_path = result_path_pur_12, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+-----------------+------+-------------------+-------------------+-------------------+
|ntile|retained_new|purchasepayoff_12| count|     retention_rate|         percentage|            cum_sum|
+-----+------------+-----------------+------+-------------------+-------------------+-------------------+
|    1|      1157.0|           4326.0|152617| 0.2674526121128063| 0.1580447172292854| 0.1580447172292854|
|    2|       913.0|           3652.0|153093|               0.25| 0.1334210141750694|0.29146573140435483|
|    3|       835.0|           3269.0|152891| 0.2554297950443561|0.11942861318135321|  0.410894344585708|
|    4|       773.0|           3151.0|153100|0.24531894636623294|0.11511763846266257| 0.5260119830483706|
|    5|       669.0|           2995.0|153136|0.22337228714524207|0.10941838374981733|  0.635430366798188|
|    6|       548.0|           2658.0|152929|0.20617005267118135|0.09710653222270933| 0.7325368990208972|
|    7|       494.0|           242

In [22]:
%%spark

#2018-01
monthly_performance(
    loanpurpose = 'purchase_12',
    score_file = 'pred_Mover_6to12_V1_jan18_20190722.csv', 
    servicecalendardate = '2018-01-31', 
    df_actual = actual, 
    result_path = result_path_pur_12, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+-----------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff_12| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+-----------------+------+-------------------+--------------------+-------------------+
|    1|      1173.0|           4764.0|154468|0.24622166246851385|  0.1952939247355907| 0.1952939247355907|
|    2|       880.0|           3626.0|154462|0.24269167126309985| 0.14864310896121996|0.34393703369681067|
|    3|       757.0|           3303.0|154775|0.22918558885861337| 0.13540214806919734|0.47933918176600804|
|    4|       619.0|           2774.0|154565|0.22314347512617158| 0.11371648766090021| 0.5930556694269082|
|    5|       543.0|           2466.0|154608|0.22019464720194648| 0.10109043207346069| 0.6941461015003689|
|    6|       469.0|           2183.0|154474|0.21484196060467248|  0.0894892186603263| 0.7836353201606953|
|    7|       414.0|        

In [23]:
%%spark

#2018-02
monthly_performance(
    loanpurpose = 'purchase_12',
    score_file = 'pred_Mover_6to12_V1_feb18_20190722.csv', 
    servicecalendardate = '2018-02-28', 
    df_actual = actual, 
    result_path = result_path_pur_12, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+-----------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff_12| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+-----------------+------+-------------------+--------------------+-------------------+
|    1|      1066.0|           4509.0|155454|0.23641605677533822| 0.18757019842755523|0.18757019842755523|
|    2|       833.0|           3587.0|156103|0.23222748815165878| 0.14921585756479055| 0.3367860559923458|
|    3|       756.0|           3215.0|155845|0.23514774494556764|  0.1337410042015059|0.47052706019385165|
|    4|       608.0|           2732.0|155949| 0.2225475841874085| 0.11364865427014435|  0.584175714463996|
|    5|       516.0|           2410.0|155912|0.21410788381742737| 0.10025375431590332| 0.6844294687798993|
|    6|       476.0|           2230.0|155936|0.21345291479820627| 0.09276592204334623| 0.7771953908232455|
|    7|       401.0|        

In [26]:
%%spark

#2018-03
monthly_performance(
    loanpurpose = 'purchase_12',
    score_file = 'pred_Mover_6to12_V1_mar18_20190722.csv', 
    servicecalendardate = '2018-03-31', 
    df_actual = actual, 
    result_path = result_path_pur_12, 
    n = 10, 
    ls_prob = ['logRegProb', 'randForProb', 'gbtProb'],
    nrow = 10
)

logRegProb:
+-----+------------+-----------------+------+-------------------+--------------------+-------------------+
|ntile|retained_new|purchasepayoff_12| count|     retention_rate|          percentage|            cum_sum|
+-----+------------+-----------------+------+-------------------+--------------------+-------------------+
|    1|       934.0|           3844.0|157584|0.24297606659729448| 0.15994673989930513|0.15994673989930513|
|    2|       791.0|           3255.0|157660|0.24301075268817204| 0.13543877168892773|0.29538551158823284|
|    3|       744.0|           2920.0|157797| 0.2547945205479452| 0.12149960471019015|  0.416885116298423|
|    4|       659.0|           2825.0|157785|0.23327433628318583| 0.11754670661174219| 0.5344318229101652|
|    5|       554.0|           2588.0|157912|0.21406491499227204| 0.10768526609245621| 0.6421170890026214|
|    6|       421.0|           2314.0|157764|0.18193604148660328|  0.0962842757874589| 0.7384013647900803|
|    7|       348.0|     

In [24]:
%%spark

#2018-04
monthly_performance(
    loanpurpose = 'purchase_12',
    score_file = 'pred_Mover_6to12_V1_apr18_20190722.csv', 
    servicecalendardate = '2018-04-30', 
    df_actual = actual, 
    result_path = result_path_pur_12, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+-----------------+------+-------------------+-------------------+-------------------+
|ntile|retained_new|purchasepayoff_12| count|     retention_rate|         percentage|            cum_sum|
+-----+------------+-----------------+------+-------------------+-------------------+-------------------+
|    1|      1088.0|           4861.0|158949|0.22382225879448672|0.18953483838265683|0.18953483838265683|
|    2|       853.0|           3861.0|159209|0.22092722092722092|0.15054392326587904| 0.3400787616485359|
|    3|       728.0|           3293.0|158980|0.22107500759186152|0.12839708347954926|0.46847584512808516|
|    4|       700.0|           3036.0|159396|  0.230566534914361|0.11837641829453738| 0.5868522634226225|
|    5|       532.0|           2554.0|159236|0.20830070477682067|0.09958279720825047|  0.686435060630873|
|    6|       467.0|           2285.0|158952|0.20437636761487965|0.08909424104183725| 0.7755293016727103|
|    7|       417.0|           1963.0

In [25]:
%%spark

#2018-05
monthly_performance(
    loanpurpose = 'purchase_12',
    score_file = 'pred_Mover_6to12_V1_may18_20190722.csv', 
    servicecalendardate = '2018-05-31', 
    df_actual = actual, 
    result_path = result_path_pur_12, 
    n = 10, 
    ls_prob = ['gbtProb'],
    nrow = 10
)

gbtProb:
+-----+------------+-----------------+------+-------------------+-------------------+-------------------+
|ntile|retained_new|purchasepayoff_12| count|     retention_rate|         percentage|            cum_sum|
+-----+------------+-----------------+------+-------------------+-------------------+-------------------+
|    1|      1182.0|           5167.0|160430|0.22875943487516934|0.19432117337344867|0.19432117337344867|
|    2|       937.0|           4098.0|160203|0.22864812103465104|0.15411808950733358|0.34843926288078225|
|    3|       798.0|           3490.0|160707|0.22865329512893984|0.13125235050770967| 0.4796916133884919|
|    4|       695.0|           3003.0|160625|0.23143523143523143|0.11293719443399775| 0.5926288078224896|
|    5|       545.0|           2614.0|160472|0.20849273144605968|0.09830763444904099| 0.6909364422715306|
|    6|       474.0|           2330.0|160644| 0.2034334763948498|0.08762692741632193| 0.7785633696878526|
|    7|       432.0|           1967.0