In [None]:
import numpy as np
import pandas as pd

import imp
import LoanPortfolio
import DataLoad
import LoanPortfolioModeling
from sklearn.preprocessing import StandardScaler

imp.reload(LoanPortfolio)
imp.reload(LoanPortfolioModeling)


In [2]:
# Load Portfolio and Data
loanPortfolio = LoanPortfolio.LoanPortfolio(rawLoanTapeDf= DataLoad.loanTapeRaw, macroDataDf = DataLoad.macroDataDf, forecastDate = "2022-12-31")
fromC = loanPortfolio.splitLoanTapeModelDict["fromC"]

modelingCH = LoanPortfolioModeling.LoanPortfolioModeling(
    fromC, responseVariable="LoanStatus2_Ind"
)



In [3]:
modelingCH.setInputList(
    [
        "PTICore",
        "LTVCore",
        "BookTier",
        "UPB_Lag1",
        "MOB",
        "RemainingTerm",
        "NumberOfExtensions",
        "CurrentRate",
        "DTICore",
        "ModelYr",
        "BookValue",
        "OriginalAmtFinanced",
        "OriginalTerm",
        "BookNewUsed",
        "BookMileage",
        "HighFico",
        "remitQtr",
        "factor_Lag1",
        "PSAVERT",
    ],
    "covariateList",
    "base",
)

modelingCH.setInputList(
    ["BookTier", "BookNewUsed", "remitQtr"], "tuningVariableList", "categoricalList"
)

<LoanPortfolioModeling.LoanPortfolioModeling at 0x1117d1990>

#### Step 1
- univariate model and kick out univariate w/ p-value > 0.25
- pooling catagorical depends

In [None]:
# modelingCH.loopThroughUnitvariate()
# print(modelingCH.univariateRunSummary)

In [None]:
# remove unsignificant covariate
modelingCH.updateInputList({"remove": ["DTICore"]}, "covariateList", "base")
modelingCH.updateInputList(
    {"add": ["BookValue", "OriginalAmtFinanced", "BookMileage", "UPB_Lag1"]},
    "tuningVariableList",
    "standardizedVariableList",
)

In [5]:
# pooling categorical: booktier and remitQtr

# modelingCH.displayCoef(modelingCH.univariateRunSummary[modelingCH.univariateRunSummary['Variable'] == 'remitQtr'][['modelRes']].values[0][0])


bookTierPooling = {}
for item in ["Tier 1","Tier 2","Tier 0","Tier -"]:
    bookTierPooling[item] = "Tier-1"

for item in ["Tier 4", "Tier -1", "Tier 3"]:
    bookTierPooling[item] = "Tier-2"

for item in ["Tier 6", "Tier 5", "Tier Thin"]:
    bookTierPooling[item] = "Tier-3"

for item in ["Tier 7", "Tier 8"]:
    bookTierPooling[item] = "Tier-4"
    
for item in ["Tier 9", "Tier 10"]:
    bookTierPooling[item] = "Tier-5"

modelingCH.poolingCategorical("BookTier", "BookTierPoolingfromC", bookTierPooling)
modelingCH.updateInputList(
    {"add": ["BookTierPoolingfromC"], "remove": ["BookTier"]}, "covariateList", "base"
)

remitQtrPooling = {}
for item in [1,2]:
    remitQtrPooling[item] = "remit1H"
    
for item in [3,4]:
    remitQtrPooling[item] = "remit2H"

modelingCH.poolingCategorical("remitQtr", "remitQtrPoolingfromC", remitQtrPooling)
modelingCH.updateInputList(
    {"add": ["remitQtrPoolingfromC"], "remove": ["remitQtr"]}, "covariateList", "base"
)


<LoanPortfolioModeling.LoanPortfolioModeling at 0x1117d1990>

In [None]:
modelingCH.runModel(targetCovariateGroup="base")

modelingCH.appendRegressionResults(resName = "step1 CovariateSelect", resNotes = "step 1 multivariate. kick out <.25 univariate, pooling categorical, add standardized numerical")


#### Step 2 / Step 3
- check unsignificant covariate in covariate model
- check each removal of covariate impact on chi2 (overall significance) and params change (confounding check)


In [7]:
# print(modelingCH.RegressionResults['step1 CovariateSelect']['modelRes'].summary())
# focus on p value of statu = 1 (ED), and 3 (Prepaid)
# ModelYr, BookMileage, BookNewUsed, BookValue

# baseCovariateModel = modelingCH.RegressionResults['step1 CovariateSelect']

# testModelYr = modelingCH.runAgainstVariate(baseCovariateModel, "ModelYr", remove = True)

# testBookMileage = modelingCH.runAgainstVariate(baseCovariateModel, "BookMileage", remove = True)

# testNewUsed = modelingCH.runAgainstVariate(baseCovariateModel, "BookNewUsed", remove = True)

# testBookValue = modelingCH.runAgainstVariate(baseCovariateModel, "BookValue", remove = True)

modelingCH.updateInputList({"remove": ["BookValue"]}, "covariateList", "base")



<LoanPortfolioModeling.LoanPortfolioModeling at 0x1117d1990>

In [None]:
modelingCH.runModel(targetCovariateGroup="base")
modelingCH.appendRegressionResults(resName = "step 3 multivariate", resNotes = "remove unsignificant covariate. checked confounding impact")

#### Step 4
- add variable excluded from step 1 and check 1) their significance and 2) confounding



In [None]:
# DTICore

# baseCovariateModel = modelingCH.RegressionResults['step 3 multivariate']
# modelingCH.updateInputList(
#     {"add": ["DTICore"]},
#     "tuningVariableList",
#     "standardizedVariableList",
# )

# testDTICore= modelingCH.runAgainstVariate(baseCovariateModel, "DTICore", remove = False)
# include DTI no major impact. p value ~20%, no meaningful confounding impact

modelingCH.runModel(targetCovariateGroup="base")
modelingCH.appendRegressionResults(resName = "step 4 multivariate", resNotes = "checked confounding impact from variate excluded from step 1")




#### Step 5: check continuous variable for non-linearity


In [None]:
# continuous variable based off step 4 multivariate model

# PTICore, LTVCore, UPB_Lag1, MOB, RemainingTerm, CurrentRate, ModelYr, OriginalAmtFinanced, OriginalTerm, BookMileage, HighFico, factor_Lag1, PSAVERT


# modelingCH.displayLogitOnSingle('PSAVERT', binsArg={"m": 6})
# focus on 1 to 0 and 3 to 0 odds ratio
# there are non-linearity; however, for the purpose of this project, we will not include non-linearity. move on



#### Step 6: check interaction term

In [None]:
# test = modelingCH.runInteractionTerms()
# test.to_csv("fromC_interactionTerms.csv", index = False)
# many significant interaction terms. added top 2 based off g2 and clinical meaning
# RemainingTerm x factor_Lag1
# MOB x RemainingTerm


modelingCH.updateInputList(
    {"add": [["RemainingTerm","factor_Lag1"],["MOB","RemainingTerm"]]},
    "tuningVariableList",
    "interaction",
)
modelingCH.addInteraction()

modelingCH.setInputList(
    modelingCH.RegressionResults['step 4 multivariate']['covariate'] + ['RemainingTerm x factor_Lag1','MOB x RemainingTerm'],
    "covariateList",
    "base",
)
modelingCH.runModel(targetCovariateGroup="base")

modelingCH.appendRegressionResults(resName = "step 6 multivariate", resNotes = "checked out interaction terms. added two interaction terms.")

#### Train and Split into training and test data set
- use final step covariate model to train
- test on test data set
- use AUC to summarize good of fitness

In [None]:
# modelingCH.runTrainingTest()
# print(modelingCH.trainingTestRes['AUC(ovr)'])
# print(modelingCH.trainingTestRes['AUC(ovo)'])

# .84
# .81

In [14]:
savePkl = {"model":modelingCH.RegressionResults['step 6 multivariate'],
           "mappings":[{"origCol":"BookTier","newCol":"BookTierPoolingfromC","mapping":bookTierPooling},
                       {"origCol":"remitQtr","newCol":"remitQtrPoolingfromC","mapping":remitQtrPooling}                       
                       ]
           }

import pickle

with open('fromCModel.pickle', 'wb') as handle:
    pickle.dump(savePkl, handle, protocol=pickle.HIGHEST_PROTOCOL)