In [1]:
#2018 HMDA Edit Testing File Generator
from collections import OrderedDict
import pandas as pd
import random
import string

In [None]:
LAR_field_names = [
    "record_id","lei","uli","app_date","loan_type","loan_purpose",
    "preapproval","const_method","occ_type","loan_amount","action_taken",
    "action_date","street_address","city","state","zip_code","county",
    "tract","app_eth_1","app_eth_2","app_eth_3","app_eth_4","app_eth_5",
    "app_eth_code_14","app_eth_text","co_app_eth_1","co_app_eth_2",
    "co_app_eth_3","co_app_eth_4","co_app_eth_5","co_app_eth_code_14",
    "app_eth_basis","co_app_eth_basis","app_race_1","app_race_2",
    "app_race_3","app_race_4","app_race_5","app_race_code_1",
    "app_race_code_27","app_race_code_44","co_app_race_1","co_app_race_2",
    "co_app_race_3","co_app_race_4","co_app_race_5","co_app_race_code_1",
    "co_app_race_code_27","co_app_race_code_44","app_race_basis",
    "co_app_race_basis","app_sex","co_app_sex","app_sex_basis",
    "co_app_sex_basis","app_age","co_app_age","income","purchaser_type",
    "rate_spread","hoepa","lien","app_credit_score","co_app_credit_score",
    "app_score_name","app_score_code_8","co_app_score_name",
    "co_app_score_code_8","denial_1","denial_2","denial_3","denial_4",
    "denial_code_9","loan_costs","points_fees","origination_fee",
    "discount_points","lender_credits","interest_rate","prepayment_penalty",
    "dti","cltv","loan_term","intro_rate","balloon","int_only_pmts",
    "neg_amort","non_amort_features","property_value","manufactured_type",
    "manufactured_interest","total_units","affordable_units",
    "submission_type","initially_payable","mlo_id","aus_1","aus_2",
    "aus_3","aus_4","aus_5","aus_code_5","aus_result_1","aus_result_2",
    "aus_result_3","aus_result_4","aus_result_5","aus_code_16",
    "reverse_mortgage","open_end_credit","business_purpose"]

TS_field_names= [
    "record_id","inst_name","calendar_year","calendar_quarter",
    "contact_name","contact_tel","contact_email","contact_street_address",
    "office_city","office_state","office_zip","federal_agency",
    "lar_entries","tax_id","lei"]

In [2]:
#2018 Filing Instruction Guide: https://www.consumerfinance.gov/data-research/hmda/static/for-filers/2018/2018-HMDA-FIG.pdf
#2018 file format:
#field name #data type # valid values
#Record ID / Numeric / 1

#Note: Add a format field? use to check format of entered data?
county_list = ['001', '013', '231'] #list of CBSA counties, dtype string
tract_list = ['0012.12', '1212.12', '4444.44'] #list of CBSA tracts, dtype string
#Note: add valid value list/function to the valid vals list field
#IE list of ages 1 to n (range(200))
#IE rate_spread float generator

TS = [
 {"field":"record_id", "data_type":"numeric", "valid_vals":[1], "dtypes_":["int"]},
 {"field":"inst_name", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
 {"field":"calendar_year", "data_type":"numeric", "valid_vals":[], "dtypes_":["int"]},
 {"field":"calendar_quarter", "data_type":"numeric", "valid_vals":[4], "dtypes_":["int"]},
 {"field":"contact_name", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
 {"field":"contact_tel", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
 {"field":"contact_email", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
 {"field":"contact_street_address", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
 {"field":"office_city", "data_type": "alphanumeric", "valid_vals":[], "dtypes_":["string"]},
 {"field":"office_state", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
 {"field":"office_zip","data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
 {"field":"federal_agency", "data_type":"numeric", "valid_vals":[1,2,3,5,7,9], "dtypes_":["int"]},
 {"field":"lar_entries", "data_type":"numeric", "valid_vals":[], "dtypes_":["int"]},
 {"field":"tax_id", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
 {"field":"lei", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]}]

LAR = [
{"field":"record_id", "data_type":"numeric", "valid_vals":[2], "dtypes_":["int"]},
{"field":"lei", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"uli", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"app_date", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["string"]},
{"field":"loan_type", "data_type":"numeric", "valid_vals":[1,2,3,4], "dtypes_":["int"]},
{"field":"loan_purpose", "data_type":"numeric", "valid_vals":[1,2,31,32,4], "dtypes_":["int"]},
{"field":"preapproval", "data_type":"numeric", "valid_vals":[1,2], "dtypes_":["int"]},
{"field":"const_method", "data_type":"numeric", "valid_vals":[1,2], "dtypes_":["int"]},
{"field":"occ_type", "data_type":"numeric", "valid_vals":[1,2,3], "dtypes_":["int"]},
{"field":"loan_amount", "data_type":"numeric", "valid_vals":[], "dtypes_":["int", "float"]},
{"field":"action_taken", "data_type":"numeric", "valid_vals":[1,2,3,4,5,6,7,8], "dtypes_":["int"]},
{"field":"action_date", "data_type":"numeric", "valid_vals":[], "dtypes_":["int"]},
{"field":"street_address", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"city", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"state", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"zip_code", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"county", "data_type":"alphanumeric", "valid_vals":[county_list, "NA"], "dtypes_":["string"]},
{"field":"tract", "data_type":"alphanumeric", "valid_vals":[tract_list, "NA"], "dtypes_":["string"]},
{"field":"app_eth_1", "data_type":"numeric", "valid_vals":[1,11,12,13,14,2,3,4], "dtypes_":["int"]},
{"field":"app_eth_2", "data_type":"numeric", "valid_vals":[1,11,12,13,14,2], "dtypes_":["int"]},
{"field":"app_eth_3", "data_type":"numeric", "valid_vals":[1,11,12,13,14,2], "dtypes_":["int"]},
{"field":"app_eth_4", "data_type":"numeric", "valid_vals":[1,11,12,13,14,2], "dtypes_":["int"]},
{"field":"app_eth_5", "data_type":"numeric", "valid_vals":[1,11,12,13,14,2], "dtypes_":["int"]},
{"field":"app_eth_code_14", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"app_eth_text", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"co_app_eth_1", "data_type":"numeric", "valid_vals":[1,11,12,13,14,2,3,4,5], "dtypes_":["int"]},
{"field":"co_app_eth_2", "data_type":"numeric", "valid_vals":[1,11,12,13,14,2], "dtypes_":["int"]},
{"field":"co_app_eth_3", "data_type":"numeric", "valid_vals":[1,11,12,13,14,2], "dtypes_":["int"]},
{"field":"co_app_eth_4", "data_type":"numeric", "valid_vals":[1,11,12,13,14,2], "dtypes_":["int"]},
{"field":"co_app_eth_5", "data_type":"numeric", "valid_vals":[1,11,12,13,14,2], "dtypes_":["int"]},
{"field":"co_app_eth_code_14", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"app_eth_basis", "data_type":"numeric", "valid_vals":[1,2,3], "dtypes_":["int"]},
{"field":"co_app_eth_basis", "data_type":"numeric", "valid_vals":[1,2,3], "dtypes_":["int"]},
{"field":"app_race_1", "data_type":"numeric", "valid_vals":[1,2,21,22,23,24,25,26,27,3,4,41,42,43,44,5,6,7], "dtypes_":["int"]},
{"field":"app_race_2", "data_type":"numeric", "valid_vals":[1,2,21,22,23,24,25,26,27,3,4,41,42,43,44,5], "dtypes_":["int"]},
{"field":"app_race_3", "data_type":"numeric", "valid_vals":[1,2,21,22,23,24,25,26,27,3,4,41,42,43,44,5], "dtypes_":["int"]},
{"field":"app_race_4", "data_type":"numeric", "valid_vals":[1,2,21,22,23,24,25,26,27,3,4,41,42,43,44,5], "dtypes_":["int"]},
{"field":"app_race_5", "data_type":"numeric", "valid_vals":[1,2,21,22,23,24,25,26,27,3,4,41,42,43,44,5], "dtypes_":["int"]},
{"field":"app_race_code_1", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"app_race_code_27", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"app_race_code_44", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"co_app_race_1", "data_type":"numeric", "valid_vals":[1,2,21,22,23,24,25,26,27,3,4,41,42,43,44,5,6,7,8], "dtypes_":["int"]},
{"field":"co_app_race_2", "data_type":"numeric", "valid_vals":[1,2,21,22,23,24,25,26,27,3,4,41,42,43,44,5], "dtypes_":["int"]},
{"field":"co_app_race_3", "data_type":"numeric", "valid_vals":[1,2,21,22,23,24,25,26,27,3,4,41,42,43,44,5], "dtypes_":["int"]},
{"field":"co_app_race_4", "data_type":"numeric", "valid_vals":[1,2,21,22,23,24,25,26,27,3,4,41,42,43,44,5], "dtypes_":["int"]},
{"field":"co_app_race_5", "data_type":"numeric", "valid_vals":[1,2,21,22,23,24,25,26,27,3,4,41,42,43,44,5], "dtypes_":["int"]},
{"field":"co_app_race_code_1", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"co_app_race_code_27", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"co_app_race_code_44", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"app_race_basis", "data_type":"numeric", "valid_vals":[1,2,3], "dtypes_":["int"]},
{"field":"co_app_race_basis", "data_type":"numeric", "valid_vals":[1,2,3], "dtypes_":["int"]},
{"field":"app_sex", "data_type":"numeric", "valid_vals":[1,2,3,4,5,6], "dtypes_":["int"]},
{"field":"co_app_sex", "data_type":"numeric", "valid_vals":[1,2,3,4,5,6], "dtypes_":["int"]},
{"field":"app_sex_basis", "data_type":"numeric", "valid_vals":[1,2,3], "dtypes_":["int"]},
{"field":"co_app_sex_basis", "data_type":"numeric", "valid_vals":[1,2,3,4], "dtypes_":["int"]},
{"field":"app_age", "data_type":"numeric", "valid_vals":[8888], "dtypes_":["int"]},
{"field":"co_app_age", "data_type":"numeric", "valid_vals":[8888,9999], "dtypes_":["int"]},
{"field":"income", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["string"]},
{"field":"purchaser_type", "data_type":"numeric", "valid_vals":[0,1,2,3,4,5,6,71,72,8,9], "dtypes_":["int"]},
{"field":"rate_spread", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["string","float"]},
{"field":"hoepa", "data_type":"numeric", "valid_vals":[1,2,3], "dtypes_":["int"]},
{"field":"lien", "data_type":"numeric", "valid_vals":[1,2], "dtypes_":["int"]},
{"field":"app_credit_score", "data_type":"numeric", "valid_vals":[], "dtypes_":["int"]},
{"field":"co_app_credit_score", "data_type":"numeric", "valid_vals":[], "dtypes_":["int"]},
{"field":"app_score_name", "data_type":"numeric", "valid_vals":[1,2,3,4,5,6,7,8,9], "dtypes_":["int"]},
{"field":"app_score_code_8", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"co_app_score_name", "data_type":"alphanumeric", "valid_vals":[1,2,3,4,5,6,7,8,9,10], "dtypes_":["int"]},
{"field":"co_app_score_code_8", "data_type":"numeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"denial_1", "data_type":"numeric", "valid_vals":[1,2,3,4,5,6,7,8,9,10], "dtypes_":["int"]},
{"field":"denial_2", "data_type":"numeric", "valid_vals":[1,2,3,4,5,6,7,8,9], "dtypes_":["int"]},
{"field":"denial_3", "data_type":"numeric", "valid_vals":[1,2,3,4,5,6,7,8,9], "dtypes_":["int"]},
{"field":"denial_4", "data_type":"numeric", "valid_vals":[1,2,3,4,5,6,7,8,9], "dtypes_":["int"]},
{"field":"denial_code_9", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"loan_costs", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["float","string"]},
{"field":"points_fees", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["float","string"]},
{"field":"origination_fee", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["float","string"]},
{"field":"discount_points", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["float","string"]},
{"field":"lender_credits", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["float","string"]},
{"field":"interest_rate", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["float","string"]},
{"field":"prepayment_penalty", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["float","string"]},
{"field":"dti", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["float","string"]},
{"field":"cltv", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["float","string"]},
{"field":"loan_term", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["int","string"]},
{"field":"intro_rate", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["int","string"]},
{"field":"balloon", "data_type":"numeric", "valid_vals":[1,2], "dtypes_":["int"]},
{"field":"int_only_pmts", "data_type":"numeric", "valid_vals":[1,2], "dtypes_":["int"]},
{"field":"neg_amort", "data_type":"numeric", "valid_vals":[1,2], "dtypes_":["int"]},
{"field":"non_amort_features", "data_type":"numeric", "valid_vals":[1,2], "dtypes_":["int"]},
{"field":"property_value", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["int","string"]},
{"field":"manufactured_type", "data_type":"numeric", "valid_vals":[1,2,3], "dtypes_":["int"]},
{"field":"manufactured_interest", "data_type":"numeric", "valid_vals":[1,2,3,4,5], "dtypes_":["int"]},
{"field":"total_units", "data_type":"numeric", "valid_vals":[], "dtypes_":["int"]},
{"field":"affordable_units", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["int","string"]},
{"field":"submission_type", "data_type":"numeric", "valid_vals":[1,2,3], "dtypes_":["int"]},
{"field":"initially_payable", "data_type":"numeric", "valid_vals":[1,2,3], "dtypes_":["int"]},
{"field":"mlo_id", "data_type":"alphanumeric", "valid_vals":["NA"], "dtypes_":["string"]},
{"field":"aus_1", "data_type":"numeric", "valid_vals":[1,2,3,4,5,6], "dtypes_":["int"]},
{"field":"aus_2", "data_type":"numeric", "valid_vals":[1,2,3,4,5], "dtypes_":["int"]},
{"field":"aus_3", "data_type":"numeric", "valid_vals":[1,2,3,4,5], "dtypes_":["int"]},
{"field":"aus_4", "data_type":"numeric", "valid_vals":[1,2,3,4,5], "dtypes_":["int"]},
{"field":"aus_5", "data_type":"numeric", "valid_vals":[1,2,3,4,5], "dtypes_":["int"]},
{"field":"aus_code_5", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"aus_result_1", "data_type":"numeric", "valid_vals":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17], "dtypes_":["int"]},
{"field":"aus_result_2", "data_type":"numeric", "valid_vals":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,], "dtypes_":["int"]},
{"field":"aus_result_3", "data_type":"numeric", "valid_vals":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,], "dtypes_":["int"]},
{"field":"aus_result_4", "data_type":"numeric", "valid_vals":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,], "dtypes_":["int"]},
{"field":"aus_result_5", "data_type":"numeric", "valid_vals":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,], "dtypes_":["int"]},
{"field":"aus_code_16", "data_type":"alphanumeric", "valid_vals":[], "dtypes_":["string"]},
{"field":"reverse_mortgage", "data_type":"numeric", "valid_vals":[1,2], "dtypes_":["int"]},
{"field":"open_end_credit", "data_type":"numeric", "valid_vals":[1,2], "dtypes_":["int"]},
{"field":"business_purpose", "data_type":"numeric", "valid_vals":[1,2], "dtypes_":["int"]}
]

In [3]:
TS_df = pd.DataFrame(TS)
TS_df.field

0                  record_id
1                  inst_name
2              calendar_year
3           calendar_quarter
4               contact_name
5                contact_tel
6              contact_email
7     contact_street_address
8                office_city
9               office_state
10                office_zip
11            federal_agency
12               lar_entries
13                    tax_id
14                       lei
Name: field, dtype: object

In [4]:
LAR_df = pd.DataFrame(LAR)

In [5]:
#set field names for TS
ts_names = list(TS_df.field)
ts_names

#set field names for LAR
lar_names = list(LAR_df.field)

#set file length parameters
#LAR rows (small=200, medium=1,000, large=10,000)
lar_small = 200
lar_medium = 1000
lar_large = 10000

#Notes: increase chance for enum added to range
# free text not valid without certain selections
# mlo_id needs NA option

#Base LAR File range limits
street_addy = "1234 Hocus Potato Way"
city = "Tatertown"
state = "UT"
zip_code = "84096"
max_age = 130
max_amount = 10000
max_income = 10000
max_rs = 100
max_credit_score = 900
min_credit_score = 300
loan_costs = 10000
points_and_fees = 5000
orig_charges = 5000
discount_points = 5000
lender_credits = 5000
interest_rate = 25
penalty_max = 36
dti = 100
cltv = 200
loan_term = 360
intro_rate = 36
prop_val_max = 30000
prop_val_min = 10


#set dummy values for TS row
ts_row_small = OrderedDict()

ts_row_small["record_id"]=str(TS_df.valid_vals[TS_df.field=="record_id"].iloc[0][0])
ts_row_small["inst_name"]="Ficus Bank"
ts_row_small["calendar_year"]=str(2018)
ts_row_small["calendar_quarter"]=str(TS_df.valid_vals[TS_df.field=="calendar_quarter"].iloc[0][0])
ts_row_small["contact_name"]="Mr. Smug Pockets"
ts_row_small["contact_tel"]="555-555-5555"
ts_row_small["contact_email"]="pockets@ficus.com"
ts_row_small["contact_street_address"]="1234 Ficus Lane"
ts_row_small["office_city"]="Ficusville"
ts_row_small["office_state"]="UT"
ts_row_small["office_zip"]="84096"
ts_row_small["federal_agency"]=str(TS_df.valid_vals[TS_df.field=="federal_agency"].iloc[0][0])
ts_row_small["lar_entries"]=str(lar_small)
ts_row_small["tax_id"]="01-0123456"
ts_row_small["lei"]="12345678901234567890"

#create TS medium row
ts_row_medium = ts_row_small.copy()
ts_row_medium["lar_entries"] = str(lar_medium)

#create TS large row
ts_row_large = ts_row_small.copy()
ts_row_large["lar_entries"] = str(lar_large)

#write small TS row to file
with open("ts_small.txt", 'w') as outfile:
    outfile.write("|".join(ts_row_small.values()))
    
with open("ts_medium.txt", 'w') as outfile:
    outfile.write("|".join(ts_row_medium.values()))
    
with open("ts_large.txt", 'w') as outfile:
    outfile.write("|".join(ts_row_large.values()))

In [6]:
def char_string_gen(length):
    """Generates a string of chosen length using ascii uppercase and numerical characters"""
    return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length))

def date_gen(year=2018, valid=True):
    """Generates and returns a semi-valid date string or an invalid date string. Does not check days per month."""
    months = list(range(1,13))
    days = list(range(1,32))
    if valid:

        date = str(year)+str(random.choice(months)).zfill(2)+str(random.choice(days)).zfill(2)
    else:
        date = str(year)+str(16)+str(33)
    return date

def random_enum(enums):
    """"""
    return random.choice(enums)

def get_schema_val(schema="LAR", position=0, item=0, field=None):
    """Returns a value from the valid_vals list in the schema for the named field. Default is the first value in the list."""
    if not field:
        raise ValueError("must specify which field")
    if schema=="LAR":
        return LAR_df.valid_vals[LAR_df.field==field].iloc[position][item]
    elif schema=="TS":
        return TS_df.valid_vals[TS_df.field==field].iloc[position][item]
    else:
        pass

def get_schema_list(schema="LAR", field=None):
    """Returns the list of valid values for the specifid schema and field."""
    if not field:
        raise ValueError("must specify which field")
    if schema=="LAR":
        return LAR_df.valid_vals[LAR_df.field==field].iloc[0]
    elif schema=="TS":
        return TS_df.valid_vals[TS_df.field==field].iloc[0]

def range_and_enum(field=None, rng_min=1, rng_max=100, dtype="int"):
    """"""
    lst=[]
    lst = get_schema_list(field=field)
    if dtype=="int":
        for i in range(rng_min,rng_max):
            lst.append(i)
    elif dtype=="float":
        for i in range(rng_min,rng_max):
            lst.append(i*.97)
    return lst


#random_enum(LAR_df.valid_vals[LAR_df.field=="loan_type"].iloc[0]) #random enumeration from loan types

boop = range_and_enum(field="rate_spread", rng_max=100, dtype="float")
print(boop)

In [17]:
#this file will have valid values for all column/row entries
#some valid value lists contain only "NA", other entries are also valid

def make_rows(num_rows):
    """Make num_rows LAR rows and return them as a list of ordered dicts"""
    LARS = []
    for i in range(num_rows):
        valid_lar_row = OrderedDict() 
        valid_lar_row["record_id"] = str(LAR_df.valid_vals[LAR_df.field=="record_id"].iloc[0][0])
        valid_lar_row["lei"] = char_string_gen(20)
        valid_lar_row["uli"] = valid_lar_row['lei'] + char_string_gen(25)
        valid_lar_row["app_date"] = date_gen()
        valid_lar_row["loan_type"] = random_enum(get_schema_list(field="loan_type"))
        valid_lar_row["loan_purpose"] = random_enum(get_schema_list(field="loan_purpose"))
        valid_lar_row["preapproval"] = random_enum(get_schema_list(field="preapproval"))
        valid_lar_row["const_method"] = random_enum(get_schema_list(field="const_method"))
        valid_lar_row["occ_type"] = random_enum(get_schema_list(field="occ_type"))
        valid_lar_row["loan_amount"] = random_enum(range(1,max_amount))
        valid_lar_row["action_taken"] = random_enum(get_schema_list(field='action_taken'))
        valid_lar_row["action_date"] = date_gen()
        valid_lar_row["street_address"] = street_addy
        valid_lar_row["city"] = city
        valid_lar_row["state"] = state
        valid_lar_row["zip_code"] = zip_code
        valid_lar_row["county"] = random_enum(county_list)
        valid_lar_row["tract"] = random_enum(tract_list)
        valid_lar_row["app_eth_1"] = random_enum(get_schema_list(field="app_eth_1"))
        valid_lar_row["app_eth_2"] = random_enum(get_schema_list(field="app_eth_2"))
        valid_lar_row["app_eth_3"] = random_enum(get_schema_list(field="app_eth_3"))
        valid_lar_row["app_eth_4"] = random_enum(get_schema_list(field="app_eth_4"))
        valid_lar_row["app_eth_5"] = random_enum(get_schema_list(field="app_eth_5"))
        valid_lar_row["app_eth_code_14"] = char_string_gen(random.choice(range(100)))
        valid_lar_row["co_app_eth_1"] = random_enum(get_schema_list(field="co_app_eth_1"))
        valid_lar_row["co_app_eth_2"] = random_enum(get_schema_list(field="co_app_eth_2"))
        valid_lar_row["co_app_eth_3"] = random_enum(get_schema_list(field="co_app_eth_3"))
        valid_lar_row["co_app_eth_4"] = random_enum(get_schema_list(field="co_app_eth_4"))
        valid_lar_row["co_app_eth_5"] = random_enum(get_schema_list(field="co_app_eth_5"))
        valid_lar_row["co_app_eth_code_14"] = char_string_gen(random.choice(range(100)))
        valid_lar_row["app_eth_basis"] = random_enum(get_schema_list(field="app_eth_basis"))
        valid_lar_row["co_app_eth_basis"] = random_enum(get_schema_list(field="co_app_eth_basis"))
        valid_lar_row["app_race_1"] = random_enum(get_schema_list(field="app_race_1"))
        valid_lar_row["app_race_2"] = random_enum(get_schema_list(field="app_race_2"))
        valid_lar_row["app_race_3"] = random_enum(get_schema_list(field="app_race_3"))
        valid_lar_row["app_race_4"] = random_enum(get_schema_list(field="app_race_4"))
        valid_lar_row["app_race_5"] = random_enum(get_schema_list(field="app_race_5"))
        valid_lar_row["app_race_code_1"] = char_string_gen(random.choice(range(100)))
        valid_lar_row["app_race_code_27"] = char_string_gen(random.choice(range(100)))
        valid_lar_row["app_race_code_44"] = char_string_gen(random.choice(range(100)))
        valid_lar_row["co_app_race_1"] = random_enum(get_schema_list(field="co_app_race_1"))
        valid_lar_row["co_app_race_2"] = random_enum(get_schema_list(field="co_app_race_2"))
        valid_lar_row["co_app_race_3"] = random_enum(get_schema_list(field="co_app_race_3"))
        valid_lar_row["co_app_race_4"] = random_enum(get_schema_list(field="co_app_race_4"))
        valid_lar_row["co_app_race_5"] = random_enum(get_schema_list(field="co_app_race_5"))
        valid_lar_row["co_app_race_code_1"] = char_string_gen(random.choice(range(100)))
        valid_lar_row["co_app_race_code_27"] = char_string_gen(random.choice(range(100)))
        valid_lar_row["co_app_race_code_44"] = char_string_gen(random.choice(range(100)))
        valid_lar_row["app_race_basis"] = random_enum(get_schema_list(field="app_race_basis"))
        valid_lar_row["co_app_race_basis"] = random_enum(get_schema_list(field="co_app_race_basis"))
        valid_lar_row["app_sex"] = random_enum(get_schema_list(field="app_sex"))
        valid_lar_row["co_app_sex"] = random_enum(get_schema_list(field="co_app_sex"))
        valid_lar_row["app_sex_basis"] = random_enum(get_schema_list(field="app_sex_basis"))
        valid_lar_row["co_app_sex_basis"] = random_enum(get_schema_list(field="co_app_sex_basis"))
        valid_lar_row["app_age"] = random_enum(range_and_enum(field="app_age", rng_max=max_age))
        valid_lar_row["co_app_age"] = random_enum(range_and_enum(field="co_app_age", rng_max=max_age))
        valid_lar_row["income"] = random_enum(range(1, max_income))
        valid_lar_row["purchaser_type"] = random_enum(get_schema_list(field="purchaser_type"))
        valid_lar_row["rate_spread"]= random_enum(range_and_enum(field="rate_spread", rng_max=max_rs, dtype="float"))
        valid_lar_row["hoepa"] = random_enum(get_schema_list(field="hoepa"))
        valid_lar_row["lien"] = random_enum(get_schema_list(field="lien"))
        valid_lar_row["app_credit_score"] = random_enum(range_and_enum(field="app_credit_score", rng_min=min_credit_score,rng_max=max_credit_score))
        valid_lar_row["co_app_credit_score"] = random_enum(range_and_enum(field="co_app_credit_score", rng_min=min_credit_score, rng_max=max_credit_score))
        valid_lar_row["app_score_name"] = random_enum(get_schema_list(field="app_score_name"))
        valid_lar_row["app_score_code_8"] = char_string_gen(random.choice(range(100)))
        valid_lar_row["co_app_score_name"] = random_enum(get_schema_list(field="co_app_score_name"))
        valid_lar_row["co_app_code_8"] = char_string_gen(random.choice(range(100)))
        valid_lar_row["denial_1"] = random_enum(get_schema_list(field="denial_1"))
        valid_lar_row["denial_2"] = random_enum(get_schema_list(field="denial_2"))
        valid_lar_row["denial_3"] = random_enum(get_schema_list(field="denial_3"))
        valid_lar_row["denial_4"] = random_enum(get_schema_list(field="denial_4"))
        valid_lar_row["denial_code_9"] = char_string_gen(random.choice(range(255)))
        valid_lar_row["loan_costs"] = random_enum(range_and_enum(field="loan_costs",rng_max=loan_costs))
        valid_lar_row["points_fees"] = random_enum(range_and_enum(field="points_fees", rng_max=points_and_fees))
        valid_lar_row["origination_fee"] = random_enum(range_and_enum(field="origination_fee", rng_max=orig_charges))
        valid_lar_row["discount_points"] = random_enum(range_and_enum(field="discount_points", rng_max=discount_points))
        valid_lar_row["lender_credits"] = random_enum(range_and_enum(field="lender_credits", rng_max=lender_credits))
        valid_lar_row["interest_rate"] = random_enum(range_and_enum(field="interest_rate", rng_max=25, dtype="float"))
        valid_lar_row["prepayment_penalty"] = random_enum(range_and_enum(field="prepayment_penalty", rng_max=penalty_max))
        valid_lar_row["dti"] = random_enum(range_and_enum(field="dti", rng_max=dti))
        valid_lar_row["cltv"] = random_enum(range_and_enum(field="cltv", rng_max=cltv))
        valid_lar_row["loan_term"] = random_enum(range_and_enum(field="loan_term", rng_max=loan_term))
        valid_lar_row["intro_rate"] = random_enum(range_and_enum(field="intro_rate", rng_max=intro_rate))
        valid_lar_row["balloon"] = random_enum(get_schema_list(field="balloon"))
        valid_lar_row["int_only_pmts"] = random_enum(get_schema_list(field="int_only_pmts"))
        valid_lar_row["neg_amort"] = random_enum(get_schema_list(field="neg_amort"))
        valid_lar_row["non_amort_features"] = random_enum(get_schema_list(field="non_amort_features"))
        valid_lar_row["property_value"] = random_enum(range_and_enum(field="property_value", rng_min=prop_val_min, rng_max=prop_val_max))
        valid_lar_row["manufactured_type"] = random_enum(get_schema_list(field="manufactured_type"))
        valid_lar_row["manufactured_interest"] = random_enum(get_schema_list(field="manufactured_interest"))
        valid_lar_row["total_units"] = random.choice(range(25))
        valid_lar_row["affordable_units"] = random.choice(range(valid_lar_row["total_units"]))
        valid_lar_row["submission_type"] = random_enum(get_schema_list(field="submission_type"))
        valid_lar_row["initially_payable"] = random_enum(get_schema_list(field="initially_payable"))
        valid_lar_row["mlo_id"] = char_string_gen(random.choice(range(25)))
        valid_lar_row["aus_1"] = random_enum(get_schema_list(field="aus_1"))
        valid_lar_row["aus_2"] = random_enum(get_schema_list(field="aus_2"))
        valid_lar_row["aus_3"] = random_enum(get_schema_list(field="aus_3"))
        valid_lar_row["aus_4"] = random_enum(get_schema_list(field="aus_4"))
        valid_lar_row["aus_5"] = random_enum(get_schema_list(field="aus_5"))
        valid_lar_row["aus_code_5"] = char_string_gen(random.choice(range(255)))
        valid_lar_row["aus_result_1"] = random_enum(get_schema_list(field="aus_result_1"))
        valid_lar_row["aus_result_2"] = random_enum(get_schema_list(field="aus_result_2"))
        valid_lar_row["aus_result_3"] = random_enum(get_schema_list(field="aus_result_3"))
        valid_lar_row["aus_result_4"] = random_enum(get_schema_list(field="aus_result_4"))
        valid_lar_row["aus_result_5"] = random_enum(get_schema_list(field="aus_result_5"))
        valid_lar_row["aus_code_16"] = char_string_gen(random.choice(range(255)))
        valid_lar_row["reverse_mortgage"] = random_enum(get_schema_list(field="reverse_mortgage"))
        valid_lar_row["open_end_credit"] = random_enum(get_schema_list(field="open_end_credit"))
        valid_lar_row["business_purpose"] = random_enum(get_schema_list(field="business_purpose"))
        LARS.append(valid_lar_row)
    return LARS

In [21]:
lar_file = make_rows(10)


In [19]:
lar_df = pd.DataFrame(lar_file)

In [20]:
lar_df.head()

Unnamed: 0,record_id,lei,uli,app_date,loan_type,loan_purpose,preapproval,const_method,occ_type,loan_amount,...,aus_code_5,aus_result_1,aus_result_2,aus_result_3,aus_result_4,aus_result_5,aus_code_16,reverse_mortgage,open_end_credit,business_purpose
0,2,10RP1102DCSRVPX4K2N6,10RP1102DCSRVPX4K2N6O5GE4T4IEIZB3JT852683AM9F,20180129,2,2,2,2,1,7999,...,WTILOO549UI0UFWQ171VCNCXKAZ604CC4RPA7VLJKLSIJ0...,9,16,11,16,5,GRRN9CHLMJHKW173RKJQ14MTZD9JNW50U3FFQ6U83ELL2O...,1,1,2
1,2,BUGX94O3IKODPC1DUYJW,BUGX94O3IKODPC1DUYJWL84TSF7PJOOI8AW8VRXGD8GB5,20181103,3,32,2,1,2,4198,...,784CD6L0ENH0LYZOUW1YSQI2I4OOV3PS1MZ0WK6Q,16,9,5,6,8,VZ6Y2D06D6CPT5D0544ORZ2V1YDVORT97DBA51GEZMSC3V...,1,1,1
2,2,DGFOGJ2KOFPXRPSDIYHK,DGFOGJ2KOFPXRPSDIYHKDTQL1YFA64ZHIX4VWB5P1E7S9,20181113,3,4,2,2,1,6303,...,LKZF2VX55P4U66HYM00O3PX99S647FHMI8O1U9SVMO95J1...,9,1,10,7,1,PUTPFBOV4I5O62GNBM1L8OB9YPLEOH29RJ3JYLPP2ZDXUI...,1,2,1
3,2,808RLZGO1P4C4GUYDLSO,808RLZGO1P4C4GUYDLSOHDQP7Y0Y3WH6HG68CEQOA5554,20180213,3,1,1,2,3,5807,...,T9YZISUGGLI588BS4B27PMSNVYLIL2A4PITYD1ZFE0U944...,14,12,4,16,14,RNS7GRLTVJRE3ICPCKHY3XYRWNS0NOMHCWE4FUSGTH827J...,1,2,2
4,2,POQQAHIBCESYIT4BJSH4,POQQAHIBCESYIT4BJSH4977Y0PHQTTRC8SYY6IMOBMTIW,20180923,3,2,1,2,1,8701,...,9UDFFVZJ3C5KNWOYBQNB0ER57I7GKJDB70K7D6VGL54RHO...,7,10,12,2,11,U8SS4UIO5QJX9J4BNBOGS9C2VVKUSCZOC8IQY4AALH76EP...,2,2,1


In [27]:
for field in TS_df.field:
    print(field)


record_id
inst_name
calendar_year
calendar_quarter
contact_name
contact_tel
contact_email
contact_street_address
office_city
office_state
office_zip
federal_agency
lar_entries
tax_id
lei


In [None]:
#S300
#The data provided in the file is incorrect. Please review the information below and update your file accordingly.
#The following criteria must be met:
#1) The first row of your file must begin with a 1; and
#2) Any subsequent rows must begin with a 2.

