# <center>Bad Bank Behavior<br>Analyzing Bank Mortgage during the 2007 Housing Bubble</center>  

<center>Michael Siebel</center>
<center>August 2020</center>
<br>
    
## <center>Data Wrangling Script</center>

# Purpose
<br>

> To ETL data from: <br>
1) Fannie Mae Loan Acquistion and Performance Data [Individual Mortgage Loans], <br>
2) U.S. Census Bureau, Small Area Estimates Branch [Median Household Income by County], <br>
3) Federal Reserve Economic Data (FRED) [Macroeconomic Data related to the Housing Market], <br>
4) Federal Deposit Insurance Corporation (FDIC) Data [Information on FDIC-backed Banks]

***

# Load Functions

In [1]:
%run Functions.ipynb
pd.set_option("display.max_columns", 999)

***

# Load Fannie Mae Data

In [None]:
# Import packages
import os
import glob

# Collect file names
fld = '..\Data\\'
x = []
for file in os.listdir(fld):
    filename = os.fsdecode(file)
    if filename.startswith("Acquisition_2007Q"): 
        x.append(fld + filename)

y = []
for file in os.listdir(fld):
    filename = os.fsdecode(file)
    if filename.startswith("Performance_2007Q"): 
        y.append(fld + filename)

# Load data
df_acq = pd.DataFrame()
df_per = pd.DataFrame()
for i in range(len(x)):
    acq, per = load_data(x[i], y[i])
    acq['File Year'], per['File Year'] = x[i][20:24], x[i][20:24]
    acq['File Quarter'], per['File Quarter'] = x[i][24:26], x[i][24:26]   
    df_acq = pd.concat([df_acq, acq], ignore_index=True)
    df_per = pd.concat([df_per, per], ignore_index=True)
    print('..Loaded year', x[i][20:24], 'quarter', x[i][24:26])

..Loaded year 2007 quarter Q1
..Loaded year 2007 quarter Q2
..Loaded year 2007 quarter Q3


In [None]:
# Drop unnecessary variables
## remove Property Type, it only had one value    
df_acq = df_acq.drop(labels=['Product Type'], axis=1)

## remove Original Loan-to-Value (LTV) and use Original Combine Loan-to-Value (CLTV)
df_acq = df_acq.drop(labels=['Original Loan-to-Value (LTV)'], axis=1)
    
## remove First Payment as this is of no value
df_acq = df_acq.drop(labels=['First Payment'], axis=1)

## remove Number of Units as it lacks data variation
df_acq = df_acq.drop(labels=['Number of Units'], axis=1)

## remove Relocation Mortgage Indicator
df_acq = df_acq.drop(labels=['Relocation Mortgage Indicator'], axis=1)

In [None]:
## 2006 Data
p1 = []
for file in os.listdir(fld):
    filename = os.fsdecode(file)
    if filename.startswith("Acquisition_2006Q"): 
        p1.append(fld + filename)

## 2002 Data
p5 = []
for file in os.listdir(fld):
    filename = os.fsdecode(file)
    if filename.startswith("Acquisition_2002Q"): 
        p5.append(fld + filename)

# Load data
## 2006 Data
df_yr1 = pd.DataFrame()
for i in range(len(p1)):
    yr1, blk = load_data(p1[i])
    yr1['File Year'] = p1[i][20:24]
    yr1['File Quarter'] = p1[i][24:26]
    df_yr1 = pd.concat([df_yr1, yr1], ignore_index=True)
    print('..Loaded year', p1[i][20:24], 'quarter', p1[i][24:26])

print('')
## 2002 Data
df_yr5 = pd.DataFrame()
for i in range(len(p5)):
    yr5, blk = load_data(p5[i])
    yr5['File Year'] = p5[i][20:24]
    yr5['File Quarter'] = p5[i][24:26]
    df_yr5 = pd.concat([df_yr5, yr5], ignore_index=True)
    print('..Loaded year', p5[i][20:24], 'quarter', p5[i][24:26])

In [None]:
# Aggregate total loans by collapsing by Banks and File Date
Loans2007 = df_acq.groupby(['Bank', 'File Year', 'File Quarter', 'Zip Code']) \
            .agg({'Original Mortgage Amount': 'mean'}).reset_index()
Loans2006 = df_yr1.groupby(['Bank', 'File Year', 'File Quarter', 'Zip Code']) \
            .agg({'Original Mortgage Amount': 'mean'}).reset_index()
Loans2002 = df_yr5.groupby(['Bank', 'File Year', 'File Quarter', 'Zip Code']) \
            .agg({'Original Mortgage Amount': 'mean'}).reset_index()

# 1 Year change in total loans
LoansYr1_Q1 = pd.merge(Loans2007.loc[Loans2007['File Quarter']=='Q1'], 
                       Loans2006.loc[Loans2006['File Quarter']=='Q1', :],
                       on=['Bank', 'Zip Code'], how='left', suffixes=('', ' (Prev)'))    
LoansYr1_Q2 = pd.merge(Loans2007.loc[Loans2007['File Quarter']=='Q2'], 
                       Loans2006.loc[Loans2006['File Quarter']=='Q2', :],
                       on=['Bank', 'Zip Code'], how='left', suffixes=('', ' (Prev)'))    
LoansYr1_Q3 = pd.merge(Loans2007.loc[Loans2007['File Quarter']=='Q3'], 
                       Loans2006.loc[Loans2006['File Quarter']=='Q3', :],
                       on=['Bank', 'Zip Code'], how='left', suffixes=('', ' (Prev)'))    
LoansYr1_Q4 = pd.merge(Loans2007.loc[Loans2007['File Quarter']=='Q4'], 
                       Loans2006.loc[Loans2006['File Quarter']=='Q4', :],
                       on=['Bank', 'Zip Code'], how='left', suffixes=('', ' (Prev)'))    
LoansYr1 = pd.concat([LoansYr1_Q1, LoansYr1_Q2, LoansYr1_Q3, LoansYr1_Q4], axis=0)
LoansYr1.loc[LoansYr1['Original Mortgage Amount (Prev)'].isnull(), 'Original Mortgage Amount (Prev)'] = 0
LoansYr1['Loan Change (1 Year)'] = LoansYr1['Original Mortgage Amount'] \
                                   - LoansYr1['Original Mortgage Amount (Prev)']
LoansYr1 = LoansYr1[['Bank', 'Zip Code', 'File Quarter', 
                     'Original Mortgage Amount', 'Loan Change (1 Year)']]
df_acq = pd.merge(df_acq, LoansYr1, on=['Bank', 'Zip Code', 'File Quarter', 
                                        'Original Mortgage Amount'], how='left', copy=False)
print('Average change in 1 year:', np.mean(LoansYr1['Loan Change (1 Year)']).round(2))

# 5 year change in total loans
LoansYr5_Q1 = pd.merge(Loans2007.loc[Loans2007['File Quarter']=='Q1'], 
                       Loans2002.loc[Loans2002['File Quarter']=='Q1', :],
                       on=['Bank', 'Zip Code'], how='left', suffixes=('', ' (Prev)'))    
LoansYr5_Q2 = pd.merge(Loans2007.loc[Loans2007['File Quarter']=='Q2'], 
                       Loans2002.loc[Loans2002['File Quarter']=='Q2', :],
                       on=['Bank', 'Zip Code'], how='left', suffixes=('', ' (Prev)'))    
LoansYr5_Q3 = pd.merge(Loans2007.loc[Loans2007['File Quarter']=='Q3'], 
                       Loans2002.loc[Loans2002['File Quarter']=='Q3', :],
                       on=['Bank', 'Zip Code'], how='left', suffixes=('', ' (Prev)'))    
LoansYr5_Q4 = pd.merge(Loans2007.loc[Loans2007['File Quarter']=='Q4'], 
                       Loans2002.loc[Loans2002['File Quarter']=='Q4', :],
                       on=['Bank', 'Zip Code'], how='left', suffixes=('', ' (Prev)'))    
LoansYr5 = pd.concat([LoansYr5_Q1, LoansYr5_Q2, LoansYr5_Q3, LoansYr5_Q4], axis=0)
LoansYr5.loc[LoansYr5['Original Mortgage Amount (Prev)'].isnull(), 'Original Mortgage Amount (Prev)'] = 0
LoansYr5['Loan Change (5 Years)'] = LoansYr5['Original Mortgage Amount'] \
                                    - LoansYr5['Original Mortgage Amount (Prev)']
LoansYr5 = LoansYr5[['Bank', 'Zip Code', 'File Quarter', 
                     'Original Mortgage Amount', 'Loan Change (5 Years)']]
df_acq = pd.merge(df_acq, LoansYr5, on=['Bank', 'Zip Code', 'File Quarter', 
                                        'Original Mortgage Amount'], how='left', copy=False)
print('Average change in 5 years:', np.mean(LoansYr5['Loan Change (5 Years)']).round(2))

print('Shape:', df_acq.shape)

***

# Merge Target from Performance Data

In [None]:
# Create Target Variable with Merge
df = merge_df(df_acq, df_per)

print('\nThe number of features is:\n', df.shape[1], sep='')
print('\nThe number of observations is:\n', df.shape[0], sep='')
target_values(df['Foreclosed'], data=True)

In [None]:
# Drop U.S. Terroritories due to missing data
df = df[df['Property State'] != 'PR']
df = df[df['Property State'] != 'GU']
df = df[df['Property State'] != 'VI']

df.head(10)

In [None]:
# Dates
## Pre-file date values indicate a mortgage loan refinnanced during the date value
df = df.sort_values(by=['Original Date'])
df['Original Date'].value_counts()

***

# Fannie Mae Feature Recodes

In [None]:
# Date Variables
df['Month'] = df['Original Date'].apply(lambda x: x.split('/')[0].strip()).apply(str)
df['Year'] = df['Original Date'].apply(lambda x: x.split('/')[1].strip()).apply(str)
df['Year'].value_counts()

In [None]:
# Co-Borrower Credit Score
df['Harmonized Credit Score'] = ( df['Co-Borrower Credit Score'].loc[df['Co-Borrower Credit Score'].notnull()] * 0.25 ) \
                                  + ( df['Credit Score'].loc[df['Co-Borrower Credit Score'].notnull()] * 0.75 ) 
df['Harmonized Credit Score'].loc[df['Co-Borrower Credit Score'].isnull()] = df['Credit Score'].loc[df['Co-Borrower Credit Score'].isnull()]    

print(df[['Harmonized Credit Score', 'Credit Score', 'Co-Borrower Credit Score']].head(10))
df = df.drop(labels=['Credit Score', 'Co-Borrower Credit Score'], axis=1)

In [None]:
# Mortgage Insurance %
df['Mortgage Insurance %'] = np.where(df['Mortgage Insurance %'].isnull(), \
                                      0, df['Mortgage Insurance %'])
df['Mortgage Insurance Type'][df['Mortgage Insurance %']==0].value_counts()

In [None]:
# Mortgage Insurance Type
df['Mortgage Insurance Type'] = np.where(df['Mortgage Insurance Type'].isnull(), \
                                         0, 1)
df['Mortgage Insurance Type'].value_counts()

In [None]:
# Collapse Refinance
df['Loan Purpose'] = np.where(df['Loan Purpose'] != 'P', 1, df['Loan Purpose'])
df['Loan Purpose'] = df['Loan Purpose'].replace('P', 0)
df['Loan Purpose'].value_counts()

In [None]:
# Recode Number of Borrowers
## Single Borrower binary
## More than one borrower is 0
df['Number of Borrowers'] = df['Number of Borrowers'].where(df['Number of Borrowers'] == 1, 0)
df = df.rename(columns={'Number of Borrowers': 'Single Borrower'})
df['Single Borrower'].value_counts()

***

# ETL Median Household Income

ETL County-level median household income from U.S. Census

Aggregate on 3-digit zipcode

In [None]:
# Import zipcode median household income
income = pd.read_excel("..\Data\est07all.xls",
                       sheet_name = 'est07ALL', header = 2)
income = income[['Name', 'Median Household Income']]
income = income.rename(columns={'Name': 'County'})

# Import county zipcode crosswalk
crosswalk = pd.read_csv("..\Data\ZIP-COUNTY-FIPS_2017-06.csv",
                        header = 0)
crosswalk = crosswalk[['ZIP', 'COUNTYNAME']]
crosswalk = crosswalk.rename(columns={'ZIP': 'Zip Code', 'COUNTYNAME': 'County'})
crosswalk['Zip Code'] = crosswalk['Zip Code'].astype(str)
crosswalk['Zip Code'] = crosswalk['Zip Code'].str.slice(start=0, stop=-2)
crosswalk['Zip Code'] = crosswalk['Zip Code'].astype(int)

# Merge
income_zipcode = pd.merge(income, crosswalk, on='County', how='outer')
income_zipcode.head(10)

In [None]:
# Merge zipcode crosswalk with 
income_zipcode = income_zipcode[['Median Household Income', 'Zip Code']]
income_zipcode = income_zipcode.groupby('Zip Code').agg({'Median Household Income': 'mean'})
df = pd.merge(df, income_zipcode, on='Zip Code', how='left')

df['Median Household Income'].describe()

***

# Group Banks

Goal is to keep bank values ~10,000

In [None]:
# Bank Values
df['Bank'].value_counts()

In [None]:
# Recode Bank to keep bank values ~10,000
## Group Small loan banks
Small_Loan = ['BISHOPS GATE RESIDENTIAL MORTGAGE TRUST', 
              'FREEDOM MORTGAGE CORP.', 
              'HSBC BANK USA, NATIONAL ASSOCIATION', 'PHH MORTGAGE CORPORATION (USAA FEDERAL SAVINGS BANK)', 
              'THIRD FEDERAL SAVINGS AND LOAN', 'WELLS FARGO BANK, N.A.']
df = df.replace({'Bank': Small_Loan}, 'SMALL LOAN BANKS')

## Collapse similar banks
Chase = ["CHASE HOME FINANCE (CIE 1)", "CHASE HOME FINANCE, LLC"]
df = df.replace({'Bank': Chase}, 'CHASE HOME FINANCE')
GMAC = ['GMAC MORTGAGE, LLC (USAA FEDERAL SAVINGS BANK)', 'GMAC MORTGAGE, LLC']
df = df.replace({'Bank': GMAC}, 'GMAC MORTGAGE')

# Check updated Bank values
df['Bank'].value_counts()

***

# ETL FRED Data

In [None]:
# State to Region Conversion
df = to_region(df, 'Property State')
df['Region'].value_counts()

In [None]:
# Household Financial Obligations as a percent of Disposable Personal Income (FODSP)
# Consumer Debt Service Payments as a Percent of Disposable Personal Income (CDSP)
# S&P/Case-Shiller U.S. National Home Price Index (CSUSHPINSA)
# Mortgage Debt Service Payments as a Percent of Disposable Personal Income  (MDSP)
# Monthly Supply of Houses in the United States (MSACSR)
# Homeowner Vacancy Rate for the United States (RHVRUSQ156N)
fred_df = ['FODSP', 'CDSP', 'CSUSHPINSA', 'MDSP', 'MSACSR']
fred_name = ['Household Financial Obligations', 'Consumer Debt Service Payment',
             'National Home Price Index', 'Mortgage Debt Service Payments', 'Monthly Supply of Houses']
fred_freq = ['qtr', 'qtr', 'yr', 'qtr', 'yr']

for i in range(len(fred_df)):
    fred_tmp = pd.read_csv('..\Data\FRED\\' + fred_df[i] + '.csv', header = 0)
    if fred_freq[i]=='qtr':
        df = fred_merge(fred_tmp, df, quarter=True, varname=fred_name[i])
    else: df = fred_merge(fred_tmp, df, quarter=False, varname=fred_name[i])

print('Shape:', df.shape)
display(df.tail())

In [None]:
# Housing Inventory Estimate: Vacant Housing Units for Sale (ESALEUSQ176N)
# Homeownership Rate for the United States (RHORUSQ156N)
# Housing Inventory Estimate: Vacant Housing Units for Rent (ERENTUSQ176N)
# Rental Vacancy Rate for the United States (RRVRUSQ156N)
fred_df = ['ESALEUSQ176N', 'RHORUSQ156N', 'ERENTUSQ176N', 'RRVRUSQ156N']
fred_name = ['Vacant Housing Units for Sale', 'Homeownership Rate', 'Vacant Housing Units for Rent',
             'Rental Vacancy Rate']
fred_freq = ['qtr', 'qtr', 'qtr', 'qtr']
for i in range(len(fred_df)):
    sub = len(fred_df[i]) - 7
    fred_prefix= fred_df[i][0:sub]
    fred_suffix= fred_df[i][-5:]
    fred_tmp = {}
    for region in ['NE', 'SO', 'MW', 'WE']:
        fred_tmp[region] = pd.read_csv('..\Data\FRED\\' + fred_prefix + region + fred_suffix + '.csv', header = 0)
    if fred_freq[i]=='qtr':
        df = fred_merge_region(NE = fred_tmp['NE'], SO = fred_tmp['SO'], MW = fred_tmp['MW'], 
                               WE = fred_tmp['WE'], varname = fred_name[i], df_orig = df,
                               quarter=True)
    else:
        df = fred_merge_region(NE = fred_tmp['NE'], SO = fred_tmp['SO'], MW = fred_tmp['MW'], 
                               WE = fred_tmp['WE'], varname = fred_name[i], df_orig = df,
                               quarter=False)
        
print('Shape:', df.shape)
display(df.tail())

***

# ETL FDIC Data

In [None]:
# use pandas to construct a list of quarterly dates
present = '20071231'
datetimes = pd.date_range('19980331', end=present, freq='Q')

# get a list of zip files over which to iterate
zip_files = glob.glob('..\Data\FDIC\*.zip')

# only want to return a subset of cols (save on memory usage!)
used_columns = ['name', 'repdte', 'asset', 'lnlsnet', 'liab', 'dep', 'eqtot', 'numemp']
used_dtypes = {'name': str, 'repdte': object, 'asset': float,
               'lnlsnet': float, 'liab': float, 'eqtot': float, 'dep': float, 'numemp': float}

# create a container for the individual dataframes
dataframes = []

for zip_file in zip_files:

    tmp_buffer = zipfile.ZipFile(zip_file)
    
    # want to work with the assets and liabilities file
    tmp_file = tmp_buffer.namelist()[5]
    
    tmp_dataframe = pd.read_csv(tmp_buffer.open(tmp_file),
                                error_bad_lines=False,  # skips the mangled obs
                                usecols=used_columns,
                                dtype=used_dtypes,
                                parse_dates=True)
    
    dataframes.append(tmp_dataframe)

# concatenate the quarterly dataframes into a single data frame
fdic = pd.concat(dataframes)

# convert units from thousands to billions of USD
fdic[['asset', 'lnlsnet', 'liab', 'dep', 'eqtot']] /= 1e6

# convert units from nummber of people to thousands of people
fdic['numemp'] /= 1e3

# Group by bank
## Group Bank of America
BoA = fdic['name'].str.contains('Bank of America')
fdic.loc[BoA, 'Bank'] = 'BANK OF AMERICA, N.A.'
## Group Citi Mortgage
Citi = fdic['name'].str.contains('Citibank|Citicorp')
fdic.loc[Citi, 'Bank'] = 'CITIMORTGAGE, INC.'
## Group JPMorgan
JPMorgan = fdic['name'].str.contains('J. P. Morgan|JPMorgan')
fdic.loc[JPMorgan, 'Bank'] = 'JPMORGAN CHASE BANK, NATIONAL ASSOCIATION'
## Group GMac
GMac = fdic['name'].str.contains('GMAC')
fdic.loc[GMac, 'Bank'] = 'GMAC MORTGAGE'
## Group PNC
PNC = fdic['name'].str.contains('PNC Bank')
fdic.loc[PNC, 'Bank'] = 'PNC BANK, N.A.'
## Group SunTrust
SunTrust = fdic['name'].str.contains('SunTrust')
fdic.loc[SunTrust, 'Bank'] = 'SUNTRUST MORTGAGE INC.'
## Group AmTrust
AmTrust = fdic['name'].str.contains('AmTrust|AMTRUST')
fdic.loc[AmTrust, 'Bank'] = 'AMTRUST BANK'
## Group Flagstar
Flagstar = fdic['name'].str.contains('Flagstar')
fdic.loc[Flagstar, 'Bank'] = 'FLAGSTAR CAPITAL MARKETS CORPORATION'
## Group First Tennessee
Tennessee = fdic['name'].str.contains('First Tennessee Bank')
fdic.loc[Tennessee, 'Bank'] = 'FIRST TENNESSEE BANK NATIONAL ASSOCIATION'
## Group Chase
Chase = fdic['name'].str.contains('Chase')
fdic.loc[Chase, 'Bank'] = 'CHASE HOME FINANCE'
## Group IndyMac
IndyMac = fdic['name'].str.contains('IndyMac')
fdic.loc[IndyMac, 'Bank'] = 'FDIC, RECEIVER, INDYMAC FEDERAL BANK FSB'
## Group small loan banks
SmallLoans = fdic['name'].str.contains('Wells Fargo|HSBC|USAA|Third Federal')
fdic.loc[SmallLoans, 'Bank'] = 'SMALL LOAN BANKS'
## Group other banks
fdic['Bank'] = np.where(fdic['Bank'].isnull(), 'OTHER', fdic['Bank'])

# Drop name
fdic = fdic.drop(labels=['name'], axis=1)

# Convert to panel
fdic = fdic.groupby(['Bank', 'repdte']).sum()
fdic = fdic.reset_index(drop=False)

# Fill monthly data
fdic = fdic_on_month(fdic)
fdic = fdic.groupby(['Bank', 'Original Date']).sum()

print('Shape:', fdic.shape)
display(fdic.tail())

In [None]:
# compute the by quarter totals for each measure
quarter_totals = fdic.groupby(['Original Date']).sum()

# compute the base quarter totals for each measure
totals_base_qtr = quarter_totals.iloc[0,:]

def janicki_prescott_norm(item):
    """
    In order to make sure results are comparable across years, I follow 
    Janicki and Prescott (2006) and deflate and re-scale each measure of bank 
    size by dividing by banking sector totals relative to some base quarter. 
    Specifically, let :math:`S_{i,t}^{raw}` denote the raw size of bank :math:`i`
    in year :math:`t` based on one of the six size measures detailed above. The 
    normalized size of bank :math:`i` relative to the base quarter is defined as
    follows:
             
    .. math::
    
        S_{i,t}^{norm} = \frac{S_{i,t}^{raw}}{\sum_{j}S_{j,t}^{raw}}\sum_{j}S_{i,base}^{raw}
    
    where :math:\sum_{j}S_{j,t}^{raw}` is the banking sector total of some size 
    measure in year :math:`t` (i.e., total banking sector assets in year :math:`t`), 
    and :math:`\sum_{j}S_{j,base}^{raw}` is the banking sector total of the same
    size measure in the base quarter.
    
    """
    return (fdic[column] / quarter_totals[column]) * totals_base_qtr[column]

# apply the Janicki and Prescott (2006) normalized size measure 
for column in fdic.columns:
    fdic[column] = janicki_prescott_norm(column)

# Period change
fdic = fdic.reset_index(drop=False)
for col in ['asset', 'lnlsnet', 'liab', 'dep', 'eqtot']:
    fdic[[str(col + ' (Qtr)')]] = fdic[[col]].pct_change(4)
    fdic[[str(col + ' (Yr)')]] = fdic[[col]].pct_change(12)

# Drop total values
fdic = fdic.drop(labels=['asset', 'lnlsnet', 'liab', 'dep', 'eqtot'], axis=1)

# Regroup
fdic = fdic.groupby(['Bank', 'Original Date']).sum()
display(fdic.tail())

In [None]:
# Merge
df = pd.merge(df, fdic, on=['Bank', 'Original Date'], how='left')
    
print('Shape:', df.shape)
display(df.tail())

***

# Final Data Missing Summary

In [None]:
# Missing
(df.isna().sum() / df.shape[0] * 100).round(2)

***

# Save Data File

In [None]:
file_to_store = open("..\Data\df.pickle", "wb")
pickle.dump(df, file_to_store)
file_to_store.close()

***

***

Temporary chunks for loading data

In [None]:
file_to_open = open('..\Data\df.pickle', 'rb') 
df  = pickle.load(file_to_open) 
file_to_open.close()

In [None]:
df = df.drop(labels=['Household Financial Obligations (Qtr)', 'Household Financial Obligations (Yr)', 
         'Consumer Debt Service Payment (Qtr)', 'Consumer Debt Service Payment (Yr)',
         'National Home Price Index (Qtr)', 'National Home Price Index (Yr)',
         'Mortgage Debt Service Payments (Qtr)', 'Mortgage Debt Service Payments (Yr)',
         'Monthly Supply of Houses (Qtr)', 'Monthly Supply of Houses (Yr)',
         'Vacant Housing Units for Sale (Qtr)', 'Vacant Housing Units for Sale (Yr)',
         'Homeownership Rate (Qtr)', 'Homeownership Rate (Yr)', 'Vacant Housing Units for Rent (Qtr)',
         'Vacant Housing Units for Rent (Yr)', 'Rental Vacancy Rate (Qtr)', 'Rental Vacancy Rate (Yr)'], axis=1)

In [None]:
df = df.drop(labels=['numemp', 'asset (Qtr)', 'asset (Yr)', 'lnlsnet (Qtr)', 'lnlsnet (Yr)', 'liab (Qtr)', 'liab (Yr)', 'dep (Qtr)', 'dep (Yr)', 'eqtot (Qtr)', 'eqtot (Yr)'], axis=1)