# <center>Bad Bank Behavior<br>Analyzing Bank Mortgage during the 2008 Housing Bubble</center>  

<center>Michael Siebel</center>
<center>December 2020</center>

<br>
    
## Table of Contents
- [Goals](#Goals)<br>
- [Load Functions](#Load-Functions)<br>
- [Data Wrangling](#Data-Wrangling)<br>
- [Improved Predictions](#Improved-Predictions)<br>
- [Weakened Predictions](#Weakened-Predictions)

# Goals  
<br>

 

***

# Load Functions

In [1]:
# Load functions
%run Functions.ipynb
pd.set_option("display.max_columns", 200)
pd.set_option('display.max_rows', 200)

# Load data
file_to_open = open('..\Data\Pickle\df.pkl', 'rb') 
df  = pickle.load(file_to_open) 
file_to_open.close()

# Drop mergeID column
df = df.drop(labels='Loan ID', axis=1)

# Convert Inf values to NA
df = df.replace([np.inf, -np.inf], np.nan)

Using TensorFlow backend.


In [2]:
## Bank and Classifier Lists
banks = ['Bank of America','Wells Fargo Bank','CitiMortgage',
         'JPMorgan Chase','GMAC Mortgage','SunTrust Mortgage',
         'AmTrust Bank','PNC Bank','Flagstar Bank']

banks_plus = banks + ['All Banks']
clfs_str = ['RFC', 'RFC PCA', 'RUS Boost'] 

## Create an environment variable to avoid using the GPU. This can be changed.
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

***

# Data Wrangling

In [3]:
# Verify Bank Counts
df['Bank'].value_counts()

Bank of America      650087
CitiMortgage         260698
Wells Fargo Bank     214039
JPMorgan Chase       202997
GMAC Mortgage        178160
SunTrust Mortgage    141398
PNC Bank             100351
AmTrust Bank          79360
Flagstar Bank         66637
Name: Bank, dtype: int64

In [4]:
# Variables to drop
dropvars = ['File Year', 'Year', 'Month', 'Region', 'FIPS',
            'Zip Code', 'Mortgage Insurance Type', 'Property State',
            'First Payment', 'Original Loan-to-Value (LTV)']
df = df.drop(labels=dropvars, axis=1)
df = df.filter(regex=r'^(?!Asset).*$')
df = df.filter(regex=r'^(?!Liab).*$')
df = df.filter(regex=r'^(?!Eqtot).*$')
df = df.filter(regex=r'^(?!Dep).*$')

# Convert Original Date to Numeric
df['Reported Period'] = df['Reported Period'].astype(float).astype(int).astype(str)
df['Reported Period'] = df['Reported Period'].apply(lambda x: x.zfill(6))
df['Reported Period'] = df['Reported Period'].map(lambda x: x[:2] + '/' + x[2:])
df = change_date(df, 'Reported Period')
df = change_date(df, 'Original Date')

# Missingness to drop
df = df.dropna()

# All data
y_all = df['Foreclosed']
X_all = df.drop(labels=['Foreclosed', 'Zero Balance Code'], axis=1) 

# Split Train (70%)
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size = 0.7, 
                                                    stratify = y_all, random_state=2019)
# Split Val (15%) and Test (15%)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size = 0.5, 
                                                stratify = y_test, random_state=2019)

# One hot encoding on remaining data
Bnk_train = X_train['Bank'].reset_index().iloc[:,1]
X_train = onehotencoding(X_train)
Bnk_val = X_val['Bank'].reset_index().iloc[:,1]
X_val = onehotencoding(X_val)
Bnk_test = X_test['Bank'].reset_index().iloc[:,1]
X_test = onehotencoding(X_test)

In [5]:
# Update Macroeconomic variables (will not use test set)
X_train, X_val, X_test = pca_fred(X_train, X_val, X_test)

# Check columns
X_train.columns

Index(['Reported Period', 'Original Interest Rate', 'Original Mortgage Amount',
       'Original Loan Term', 'Original Date',
       'Original Combined Loan-to-Value (CLTV)', 'Single Borrower',
       'Original Debt to Income Ratio', 'Loan Purpose', 'Number of Units',
       'Mortgage Insurance %', 'Credit Score', 'Loan Change (1 Year)',
       'Loan Change (5 Years)', 'Median Household Income',
       'Number of Employees', 'Lnlsnet (5 Yr)', 'Lnlsnet (1 Yr)',
       'Origination Channel_B', 'Origination Channel_C',
       'Origination Channel_R', 'Bank_AmTrust Bank', 'Bank_Bank of America',
       'Bank_CitiMortgage', 'Bank_Flagstar Bank', 'Bank_GMAC Mortgage',
       'Bank_JPMorgan Chase', 'Bank_PNC Bank', 'Bank_SunTrust Mortgage',
       'Bank_Wells Fargo Bank', 'First Time Home Buyer_N',
       'First Time Home Buyer_Y', 'Property Type_CO', 'Property Type_CP',
       'Property Type_MH', 'Property Type_PU', 'Property Type_SF',
       'Occupancy Type_I', 'Occupancy Type_P', 'Occupanc

In [6]:
# List of banks
banks = ['Bank of America','Wells Fargo Bank','CitiMortgage',
         'JPMorgan Chase','GMAC Mortgage','SunTrust Mortgage',
         'AmTrust Bank','PNC Bank','Flagstar Bank']

# Run Function
Banks_X, Banks_y = Bank_Subsets(banks, df_X = X_train, df_y = y_train)
Banks_X_val, Banks_y_val = Bank_Subsets(banks, df_X = X_val, df_y = y_val)
Banks_X_test, Banks_y_test = Bank_Subsets(banks, df_X = X_test, df_y = y_test)
X_train = X_train.filter(regex=r'^(?!Bank).*$')
X_val = X_val.filter(regex=r'^(?!Bank).*$')
X_test = X_test.filter(regex=r'^(?!Bank).*$')

# All Banks
Banks_y['All Banks'] = y_train
Banks_X['All Banks'] = X_train
Banks_y_val['All Banks'] = y_val
Banks_X_val['All Banks'] = X_val
Banks_y_test['All Banks'] = y_test
Banks_X_test['All Banks'] = X_test

print('Shape:', X_train.shape)

Shape: (483564, 42)


***

# Improved Assumptions

In [7]:
# Loading models
file_to_open = open('..\Data\Pickle\models.pkl', 'rb') 
vote_models = pickle.load(file_to_open) 
file_to_open.close()

# Loading Thresholds
file_to_open = open('..\Data\Pickle\model_thresholds.pkl', 'rb') 
vote_thresholds = pickle.load(file_to_open) 
file_to_open.close()

In [8]:
# Combine Train, Validation, and Testing Data
X = pd.concat([X_train, X_val, X_test], axis=0).reset_index().iloc[:,1:]
y = pd.concat([y_train, y_val, y_test], axis=0).reset_index().iloc[:,1]
bank_idx = pd.concat([Bnk_train, Bnk_val, Bnk_test], axis=0).reset_index().iloc[:,1]

# Initiate Dictionaries
better = {}
better_value = {}
best = {}
best_value = {}

worse = {}
worse_value = {}
worst = {}
worst_value = {}

In [None]:
# Credit Score
print('Credit Score Distribution')
print(X['Credit Score'].describe().round(0))
print('')
better['Credit Score'], \
better_value['Credit Score'] = changing_assumptions(
    'Credit Score', 75, 
    banks, bank_idx, X,
    vote_models, vote_thresholds, 
    Banks_X, Banks_X_val, Banks_X_test,
    Banks_y, Banks_y_val, Banks_y_test
)

Credit Score Distribution
count    1611881.0
mean         719.0
std           59.0
min          330.0
25%          675.0
50%          724.0
75%          770.0
max          850.0
Name: Credit Score, dtype: float64

Converting Credit Score to the 75 percentile: 770.0



In [None]:
# Debt-to-Income
print('Debt-to-Income Distribution')
print(X['Original Debt to Income Ratio'].describe().round(0))
print('')
better['Original Debt to Income Ratio'], \
better_value['Original Debt to Income Ratio'] = changing_assumptions('Original Debt to Income Ratio', 25, 
                                  banks, bank_idx, X, 
                                  vote_models, vote_thresholds, 
                                  Banks_X, Banks_X_val, Banks_X_test,
                                  Banks_y, Banks_y_val, Banks_y_test)

In [None]:
# Loan to Value
print('Loan-to-Value Distribution')
print(X['Original Combined Loan-to-Value (CLTV)'].describe().round(0))
print('')
better['Original Combined Loan-to-Value (CLTV)'], \
better_value['Original Combined Loan-to-Value (CLTV)'] = changing_assumptions(
    'Original Combined Loan-to-Value (CLTV)', 25, 
    banks, bank_idx, X, 
    vote_models, vote_thresholds, 
    Banks_X, Banks_X_val, Banks_X_test,
    Banks_y, Banks_y_val, Banks_y_test
)

In [None]:
# Median Household Income
print('Median Household Income Distribution')
print(X['Median Household Income'].describe().round(2))
print('')
better['Median Household Income'], \
better_value['Median Household Income'] = changing_assumptions(
    'Median Household Income', 75, 
    banks, bank_idx, X, 
    vote_models, vote_thresholds, 
    Banks_X, Banks_X_val, Banks_X_test,
    Banks_y, Banks_y_val, Banks_y_test
)

In [None]:
# Median Household Income (Best Assumption)
print('Median Household Income Distribution')
print(X['Median Household Income'].describe().round(2))
print('')
best['Median Household Income'], \
best_value['Median Household Income'] = changing_assumptions(
    'Median Household Income', 100, 
    banks, bank_idx, X, 
    vote_models, vote_thresholds, 
    Banks_X, Banks_X_val, Banks_X_test,
    Banks_y, Banks_y_val, Banks_y_test
)

In [None]:
# Loan Change
print('Loan Change (1 Year) Distribution')
print(X['Loan Change (1 Year)'].describe().round(2))
print('')
print('Loan Change (5 Years) Distribution')
print(X['Loan Change (5 Years)'].describe().round(2))
print('')
better['Loan Change (1 Year)'], \
better_value['Loan Change (1 Year)']= changing_assumptions(
    ['Loan Change (1 Year)', 'Loan Change (5 Years)'], [25, 25], 
    banks, bank_idx, X, 
    vote_models, vote_thresholds, 
    Banks_X, Banks_X_val, Banks_X_test,
    Banks_y, Banks_y_val, Banks_y_test
)

In [None]:
# Bank Loan Liabilities
print('Bank Loan Liabilities (1 Year) Distribution')
print(X['Lnlsnet (1 Yr)'].describe().round(2))
print('')
print('Bank Loan Liabilities (5 Years) Distribution')
print(X['Lnlsnet (5 Yr)'].describe().round(2))
print('')
better['Lnlsnet (1 Yr)'], \
better_value['Lnlsnet (1 Yr)'] = changing_assumptions(
    ['Lnlsnet (1 Yr)', 'Lnlsnet (5 Yr)'], [25, 25],
    banks, bank_idx, X, 
    vote_models, vote_thresholds, 
    Banks_X, Banks_X_val, Banks_X_test,
    Banks_y, Banks_y_val, Banks_y_test
)

In [None]:
# Bank Loan Liabilities (Best Assumption)
print('Bank Loan Liabilities (1 Year) Distribution')
print(X['Lnlsnet (1 Yr)'].describe().round(2))
print('')
print('Bank Loan Liabilities (5 Years) Distribution')
print(X['Lnlsnet (5 Yr)'].describe().round(2))
print('')
best['Lnlsnet (1 Yr)'], \
best_value['Lnlsnet (1 Yr)'] = changing_assumptions(
    ['Lnlsnet (1 Yr)', 'Lnlsnet (5 Yr)'], [100, 100],
    banks, bank_idx, X, 
    vote_models, vote_thresholds, 
    Banks_X, Banks_X_val, Banks_X_test,
    Banks_y, Banks_y_val, Banks_y_test
)

***

In [None]:
# Save improved assumptions
data = [better, better_value, best, best_value]
with open("..\Data\Pickle\pred_votes_improved.pkl", "wb") as f:
    pickle.dump(data, f)

***

# Weakened Assumptions

In [None]:
# Credit Score
print('Credit Score Distribution')
print(X['Credit Score'].describe().round(0))
print('')
worse['Credit Score'], \
worse_value['Credit Score'] = changing_assumptions(
    'Credit Score', 25, 
    banks, bank_idx, X, 
    vote_models, vote_thresholds, 
    Banks_X, Banks_X_val, Banks_X_test,
    Banks_y, Banks_y_val, Banks_y_test
)

In [None]:
# Debt-to-Income
print('Debt-to-Income Distribution')
print(X['Original Debt to Income Ratio'].describe().round(0))
print('')
worse['Original Debt to Income Ratio'], \
worse_value['Original Debt to Income Ratio'] = changing_assumptions(
    'Original Debt to Income Ratio', 75, 
    banks, bank_idx, X, 
    vote_models, vote_thresholds, 
    Banks_X, Banks_X_val, Banks_X_test,
    Banks_y, Banks_y_val, Banks_y_test
)

In [None]:
# Loan-to-Value
print('Loan-to-Value Distribution')
print(X['Original Combined Loan-to-Value (CLTV)'].describe().round(0))
print('')
worse['Original Combined Loan-to-Value (CLTV)'], \
worse_value['Original Combined Loan-to-Value (CLTV)'] = changing_assumptions(
    'Original Combined Loan-to-Value (CLTV)', 75, 
    banks, bank_idx, X, 
    vote_models, vote_thresholds, 
    Banks_X, Banks_X_val, Banks_X_test,
    Banks_y, Banks_y_val, Banks_y_test
)

In [None]:
# Median Household Income
print('Median Household Income Distribution')
print(X['Median Household Income'].describe().round(2))
print('')
worse['Median Household Income'], \
worse_value['Median Household Income'] = changing_assumptions(
    'Median Household Income', 25, 
                                    banks, bank_idx, X, 
                                    vote_models, vote_thresholds, 
                                    Banks_X, Banks_X_val, Banks_X_test,
                                    Banks_y, Banks_y_val, Banks_y_test
)

In [None]:
# Median Household Income (Worst Assumption)
print('Median Household Income Distribution')
print(X['Median Household Income'].describe().round(2))
print('')
worst['Median Household Income'], \
worst_value['Median Household Income'] = changing_assumptions(
    'Median Household Income', 0, 
    banks, bank_idx, X, 
    vote_models, vote_thresholds, 
    Banks_X, Banks_X_val, Banks_X_test,
    Banks_y, Banks_y_val, Banks_y_test
)

In [None]:
# Loan Change
print('Loan Change (1 Year) Distribution')
print(X['Loan Change (1 Year)'].describe().round(2))
print('')
print('Loan Change (5 Years) Distribution')
print(X['Loan Change (5 Years)'].describe().round(2))
print('')
worse['Loan Change (1 Year)'], \
worse_value['Loan Change (1 Year)'] = changing_assumptions(
    ['Loan Change (1 Year)', 'Loan Change (5 Years)'], [75, 75], 
    banks, bank_idx, X, 
    vote_models, vote_thresholds, 
    Banks_X, Banks_X_val, Banks_X_test,
    Banks_y, Banks_y_val, Banks_y_test
)

In [None]:
# Bank Loan Liabilities
print('Bank Loan Liabilities (1 Year) Distribution')
print(X['Lnlsnet (1 Yr)'].describe().round(2))
print('')
print('Bank Loan Liabilities (5 Years) Distribution')
print(X['Lnlsnet (5 Yr)'].describe().round(2))
print('')
worse['Lnlsnet (1 Yr)'], \
worse_value['Lnlsnet (1 Yr)'] = changing_assumptions(
    ['Lnlsnet (1 Yr)', 'Lnlsnet (5 Yr)'], [75, 75],
    banks, bank_idx, X, 
    vote_models, vote_thresholds, 
    Banks_X, Banks_X_val, Banks_X_test,
    Banks_y, Banks_y_val, Banks_y_test
)

In [None]:
# Bank Loan Liabilities (Worst Assumption)
print('Bank Loan Liabilities (1 Year) Distribution')
print(X['Lnlsnet (1 Yr)'].describe().round(2))
print('')
print('Bank Loan Liabilities (5 Years) Distribution')
print(X['Lnlsnet (5 Yr)'].describe().round(2))
print('')
worst['Lnlsnet (1 Yr)'], \
worst_value['Lnlsnet (1 Yr)'] = changing_assumptions(
    ['Lnlsnet (1 Yr)', 'Lnlsnet (5 Yr)'], [0, 0],
    banks, bank_idx, X, 
    vote_models, vote_thresholds, 
    Banks_X, Banks_X_val, Banks_X_test,
    Banks_y, Banks_y_val, Banks_y_test
)

In [None]:
# Save weakened assumptions
data = [worse, worse_value, worst, worst_value]
with open("..\Data\Pickle\pred_votes_wweakened.pkl", "wb") as f:
    pickle.dump(data, f)

***