In [1]:
# K. David Roell 8/23/2017
# This file will modify files created in CYOA test files so that the files fail specific edits


In [2]:
import os
import pandas as pd
import random
import string

pd.set_option("display.max_columns",110)

In [3]:
#Lists of names will be used for loading data into Pandas frames
LAR_field_names = [
    "record_id","lei","uli","app_date","loan_type","loan_purpose",
    "preapproval","const_method","occ_type","loan_amount","action_taken",
    "action_date","street_address","city","state","zip_code","county",
    "tract","app_eth_1","app_eth_2","app_eth_3","app_eth_4","app_eth_5",
    "app_eth_code_14","co_app_eth_1","co_app_eth_2",
    "co_app_eth_3","co_app_eth_4","co_app_eth_5","co_app_eth_code_14",
    "app_eth_basis","co_app_eth_basis","app_race_1","app_race_2",
    "app_race_3","app_race_4","app_race_5","app_race_code_1",
    "app_race_code_27","app_race_code_44","co_app_race_1","co_app_race_2",
    "co_app_race_3","co_app_race_4","co_app_race_5","co_app_race_code_1",
    "co_app_race_code_27","co_app_race_code_44","app_race_basis",
    "co_app_race_basis","app_sex","co_app_sex","app_sex_basis",
    "co_app_sex_basis","app_age","co_app_age","income","purchaser_type",
    "rate_spread","hoepa","lien","app_credit_score","co_app_credit_score",
    "app_score_name","app_score_code_8","co_app_score_name",
    "co_app_score_code_8","denial_1","denial_2","denial_3","denial_4",
    "denial_code_9","loan_costs","points_fees","origination_fee",
    "discount_points","lender_credits","interest_rate","prepayment_penalty",
    "dti","cltv","loan_term","intro_rate","balloon","int_only_pmts",
    "neg_amort","non_amort_features","property_value","manufactured_type",
    "manufactured_interest","total_units","affordable_units",
    "submission_type","initially_payable","mlo_id","aus_1","aus_2",
    "aus_3","aus_4","aus_5","aus_code_5","aus_result_1","aus_result_2",
    "aus_result_3","aus_result_4","aus_result_5","aus_code_16",
    "reverse_mortgage","open_end_credit","business_purpose"]

TS_field_names= [
    "record_id","inst_name","calendar_year","calendar_quarter",
    "contact_name","contact_tel","contact_email","contact_street_address",
    "office_city","office_state","office_zip","federal_agency",
    "lar_entries","tax_id","lei"]

In [4]:
#load TS rows for adding to LAR file parts
ts_row_sm = open("edits_files/file_parts/ts_small.txt", 'r').readline()
ts_row_med = open("edits_files/file_parts/ts_small.txt", 'r').readline()
ts_row_lg = open("edits_files/file_parts/ts_small.txt", 'r').readline()
ts_rows = [ts_row_sm, ts_row_med, ts_row_lg]

FileNotFoundError: [Errno 2] No such file or directory: 'edits_files/file_parts/ts_small.txt'

In [None]:
#common variables
path = "edits_files/file_parts/"
final_path ="edits_files/syntax/"
file_name = "lar_passes_{size}_no_ts.txt"

#Helper functions

def write_lar_files(edit_name, ts_rows=ts_rows):
    """Writes edit testing files to edit folder."""
    if edit_name[:1]=="s":
        edit_type = "syntax"
    elif edit_name[:1]=="v":
        edit_type = "validity"
    elif edit_name[:1]=="q":
        edit_type = "quality"
    
    source_path = "edits_files/file_parts/"
    final_path = "edits_files/" + edit_type + "/"
    source_names = ["_lar_sm.txt", "_lar_med.txt", "_lar_lg.txt"]
    sizes = ["sm", "med", "lg"]
    
    if not os.path.exists(final_path):
        os.makedirs(final_path)
    
    for source_name, size, ts_row in zip(source_names, sizes, ts_rows):
        outfile = edit_name+"_"+size+".txt"
        with open(source_path+source_name, 'r') as source_file:
            with open(final_path+outfile, 'w') as final_file:
                final_file.write(ts_row)
                for line in source_file.readlines():
                    final_file.write(line)

def write_mod_lars():
    """Writes modified LAR data to intermediate file prior to adding TS row."""
    sm.to_csv(path+"_lar_sm.txt", sep="|", index=False, header=False)
    med.to_csv(path+"_lar_med.txt", sep="|", index=False, header=False)
    lg.to_csv(path+"_lar_lg.txt", sep="|", index=False, header=False)

def write_mod_ts():
    """Writes modified TS data to intermediate file prior to adding LAR rows."""
    ts_sm.to_csv(path+"_ts_sm.txt", sep="|", index=False, header=False)
    ts_med.to_csv(path+"_ts_med.txt", sep="|", index=False, header=False)
    ts_lg.to_csv(path+"_ts_lg.txt", sep="|", index=False, header=False)
    
def rem_file_parts(path, files=[]):
    """Removes the file parts used in creating test files."""
    for file in files:
        os.remove(path+file)
        
def load_lar(size="small", path="edits_files/file_parts/", file_name="lar_passes_{size}_no_ts.txt"):
    """Returns a dataframe for the file specified. This function is deprecated"""
    return pd.read_csv(path+file_name.format(size=size), sep="|", header=None, names=LAR_field_names, dtype=object)

def load_mod_ts():
    """Loads modified TS files as strings for adding to LAR data."""
    sm = open("edits_files/file_parts/_ts_sm.txt", 'r').readline()
    med = open("edits_files/file_parts/_ts_med.txt", 'r').readline()
    lg = open("edits_files/file_parts/_ts_lg.txt", 'r').readline()
    return sm, med, lg

def load_lars():
    lar_sm = pd.read_csv("edits_files/file_parts/lar_passes_small_no_ts.txt", sep="|", header=None, names=LAR_field_names, dtype=object)
    lar_med = pd.read_csv("edits_files/file_parts/lar_passes_medium_no_ts.txt", sep="|", header=None, names=LAR_field_names, dtype=object)
    lar_lg = pd.read_csv("edits_files/file_parts/lar_passes_large_no_ts.txt", sep="|", header=None, names=LAR_field_names, dtype=object)
    return lar_sm, lar_med, lar_lg

def load_ts_pandas():
    sm = pd.read_csv("edits_files/file_parts/ts_small.txt", sep="|", dtype=object, header=None, names=TS_field_names)
    med = pd.read_csv("edits_files/file_parts/ts_medium.txt", sep="|", dtype=object, header=None, names=TS_field_names)
    lg = pd.read_csv("edits_files/file_parts/ts_large.txt", sep="|", dtype=object, header=None, names=TS_field_names)
    return sm, med, lg

def random_string(length, join=""):
    """"""
    return "{join}".join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length)).format(join=join)

def check_digit_gen(valid=True, ULI=None):
    """Generates a check digit for a ULI in accordance with 
    https://www.consumerfinance.gov/eregulations/diff/1003-C/2015-26607_20170101/2015-26607_20180101?from_version=2015-26607_20170101#1003-C-1"""
    if ULI is None:
        raise ValueError("a ULI must be supplied")

    #digit_vals contains the conversion of numbers to letters
    digit_vals = {
    'A':10, 'H':17,'O':24,'V':31,'B':11,'I':18,'P':25,'W':32,'C':12,'J':19,'Q':26,'X':33,'D':13,'K':20,'R':27,'Y':34,
    'E':14,'L':21,'S':28,'Z':35,'F':15,'M':22,'T':29,'G':16,'N':23,'U':30}
    
    uli_chars = list(ULI)
    mod_uli_chars = []
    for char in uli_chars:
        if char.upper() in digit_vals.keys():
            mod_uli_chars.append(str(digit_vals[char.upper()]))
        else:
            mod_uli_chars.append(char)
    mod_uli_chars.append('00') 
    digit_base = int("".join(mod_uli_chars))
    digit_modulo = digit_base % 97
    check_digit = 98 - digit_modulo
    
    if valid:
        return str(check_digit).zfill(2) #left pad check digit with 0 if length is less than 2
    else:
        return str(check_digit+6).zfill(2)[:2] #return a bad check digit (used in edit testing)

In [None]:
#S300
#The data provided in the file is incorrect. Please review the information below and update your file accordingly.
#The following criteria must be met:
#1) The first row of your file must begin with a 1; and
#2) Any subsequent rows must begin with a 2.

#fail notes:
#file has record id for transmittal sheet =2 and record id for lar =1

ts_sm, ts_med, ts_lg = load_ts_pandas()#load TS data as dataframe for modification

#change Calendar Year to 2010 (invalid for 2018)
ts_sm.record_id = "2"
ts_med.record_id = "2"
ts_lg.record_id = "2"
write_mod_ts() #write mod TS rows to files
ts_sm, ts_med, ts_lg = load_mod_ts() #re-load TS data as strings 

#load base LAR data for modification
sm, med, lg = load_lars()
#modify LAR data to fail edit, change record id of lar to 1
sm.record_id = "1"
med.record_id = "1"
lg.record_id = "1"
#write modified LAR data to file
write_mod_lars()
#write test file to syntax folder
write_lar_files("s300", [ts_sm, ts_med, ts_lg])

In [None]:
#S301
#The LEI in this row does not match the reported LEI in the transmittal sheet (the first row of your file). 
#Please update your file accordingly.

#fail notes
#LEI in LAR rows has been changed to a random string of ASCII characters

#load base LAR data for modification
sm, med, lg = load_lars()
#change LEI in LAR rows
sm.lei = (''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)))
med.lei = (''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)))
lg.lei = (''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)))
#write modified LAR data to file
write_mod_lars()
#write test file to syntax folder
write_lar_files("s301")
#remove file parts used to create edit
rem_file_parts(path, files=['_lar_sm.txt', '_lar_med.txt', '_lar_lg.txt'])

In [None]:
#V600
#An LEI in an invalid format was provided. Please review the information below and update your file accordingly.
#1) The required format for LEI is alphanumeric with 20 characters, and it cannot be left blank.

#fail notes
#LEI is blank for each LAR row

#load base LAR data
sm, med, lg = load_lars()
#change LEI to invalid format (blank)
sm.lei = ""
med.lei = ""
lg.lei = ""
#write modified LAR data to file
write_mod_lars()
#write test file to validity folder
write_lar_files("v600")


In [None]:
#S302
#The reported Calendar Year does not match the filing year indicated at the start of the filing. 
#Please confirm the information below and update your file accordingly.
#1) The correct file has been uploaded; and
#2) The correct filing year was chosen at the start of the filing; and
#3) The calendar year is listed correctly in the file.

#fail notes
#Calendar Year in the TS row has been changed to 2010

#load base LAR data
sm, med, lg = load_lars()
ts_sm, ts_med, ts_lg = load_ts_pandas()#load TS data as dataframe for modification

#change Calendar Year to 2010 (invalid for 2018)
ts_sm.calendar_year = "2010"
ts_med.calendar_year = "2010"
ts_lg.calendar_year = "2010"
#write mod TS rows to files
write_mod_ts()
#re-load TS data as strings
ts_sm, ts_med, ts_lg = load_mod_ts()
#write mod LARs (overwrites other modified LARs)
write_mod_lars()
#write test file to syntax folder
write_lar_files("s302", ts_rows=[ts_sm, ts_med, ts_lg])

In [None]:
#S303
#The reported Federal Agency; Federal Taxpayer Identification Number; 
#and Legal Entity Identifier must match the Federal Agency; Federal Taxpayer Identification Number; 
#and Legal Entity Identifier for the financial institution for which you are filing. 
#Please confirm the information below and update your file accordingly.
#1) The correct financial institution was at the start of the filing; and
#2) The correct file was uploaded; and
#3) The Federal Agency, Federal Taxpayer Identification Number, 
#and Legal Entity Identifier are reported correctly in the file.

#fail nots
#Requires a match against panel
#Discuss how to implement this file

In [None]:
#S304
#The reported Total Number of Entries Contained in Submission does not match the total number of LARs in the HMDA file.
#Please update your file accordingly.

#fail notes:
#TS rows have been re-ordered sm>md, md>lg, lg>sm

#load base LAR data
sm, med, lg = load_lars()
#change TS rows
s304_ts_sm = ts_row_med
s304_ts_med = ts_row_lg
s304_ts_lg = ts_row_sm
#write mod LARs (overwrites other modified LAR files)
write_mod_lars()
#write test file to syntax folder
write_lar_files("s304", ts_rows=[s304_ts_sm, s304_ts_med, s304_ts_lg])

In [None]:
#V601
#The following data fields are required, and cannot be left blank. A blank value(s) was provided. 
#Please review the information below and update your file accordingly.
#1) Financial Institution Name;
#2) Contact Person's Name;
#3) Contact Person's E-mail Address;
#4) Contact Person's Office Street Address;
#￼5) Contact Person's Office City

#fail notes
#contact data has been changed to blank
change_fields = ["contact_name","contact_tel","contact_email","contact_street_address","office_city"]
#load base LAR data
sm, med, lg = load_lars()
#load TS rows as dataframes for modification (prevents issues with changes to field values)
ts_sm, ts_med, ts_lg = load_ts_pandas()
#change TS row data to fail edit
for field in change_fields:
    ts_sm[field] = ""
    ts_med[field] = ""
    ts_lg[field] = ""
#write modified TS data to files
write_mod_ts()
#write mod LARs (overwrites other modified LAR data)
write_mod_lars()
#re-load TS data as strings
ts_sm, ts_med, ts_lg = load_mod_ts()
#write test file to validity folder
write_lar_files("v601", ts_rows=[ts_sm, ts_med, ts_lg])

In [None]:
#V602
#An invalid Calendar Quarter was reported. Please review the information below and update your file accordingly.
#1) Calendar Quarter must equal 4, and cannot be left blank.

#fail notes:
#calendar quarter has been changed to blank

#load base LAR data
sm, med, lg = load_lars()
#write mod LARs (overwrites other modified LAR data)
write_mod_lars()
#load TS rows as dataframe for modification
ts_sm, ts_med, ts_lg = load_ts_pandas()
#change TS row data to fail edit
ts_sm.calendar_quarter = ""
ts_med.calendar_quarter = ""
ts_lg.calendar_quarter = ""
#write modified TS data
write_mod_ts()
#re-load TS data as strings
ts_sm, ts_med, ts_lg = load_mod_ts()
#write test files to validity folder
write_lar_files("v602", ts_rows=[ts_sm, ts_med, ts_lg])

In [None]:
#v603
#An invalid Contact Person's Telephone Number was provided. 
#Please review the information below and update your file accordingly.
#1) The required format for the Contact Person's Telephone Number is 999-999-9999, and it cannot be left blank.

#fail notes:
#Contact phone has been changed to 555-5555

#load base LAR data
sm, med, lg = load_lars()
#write mod LARs (overwrites other modified LAR data)
write_mod_lars()
#load TS rows as dataframe for modification
ts_sm, ts_med, ts_lg = load_ts_pandas()
#change TS row data to fail edit
ts_sm.contact_tel = "555-5555"
ts_med.contact_tel = "555-5555"
ts_lg.contact_tel = "555-5555"
#write mod TS rows to files
write_mod_ts()
#re-load TS data as strings
ts_sm, ts_med, ts_lg = load_mod_ts()
#write test files to validity folder
write_lar_files("v603", ts_rows=[ts_sm, ts_med, ts_lg])

In [None]:
#V604
#An invalid Contact Person's Office State was provided. 
#Please review the information below and update your file accordingly.
#1) Contact Person's Office State must be a two letter state code, and cannot be left blank.

#fail notes:
#Office State has been changed to 3 characters

#load base LAR data
sm, med, lg = load_lars()
#write mod LARs (overwrites other modified LAR data)
write_mod_lars()
#load TS rows as dataframe for modification
ts_sm, ts_med, ts_lg = load_ts_pandas()
#change TS row data to fail edit
ts_sm.office_state = "UTX"
ts_med.office_state = "UTX"
ts_lg.office_state = "UTX"
#write mod TS rows to files
write_mod_ts()
#re-load TS data as strings
ts_sm, ts_med, ts_lg = load_mod_ts()
#write test files to validity folder
write_lar_files("v604", ts_rows=[ts_sm,ts_med,ts_lg])


In [None]:
#V605
#An invalid Contact Person's ZIP Code was provided. 
#Please review the information below and update your file accordingly.
#1) The required format for the Contact Person's ZIP Code is 12345-1010 or 12345, and it cannot be left blank.

#fail notes:
#ZIP Code changed to blank

sm, med, lg = load_lars()#load base LAR data
write_mod_lars()#write mod LARs (overwrites other modified LAR data)
ts_sm, ts_med, ts_lg = load_ts_pandas()#load TS rows as dataframe for modification

#change TS row data to fail edit
ts_sm.office_zip = ""
ts_med.office_zip = ""
ts_lg.office_zip = ""

write_mod_ts()#write mod TS rows to files
ts_sm, ts_med, ts_lg = load_mod_ts() #re-load TS data as strings
write_lar_files("v605", ts_rows=[ts_sm, ts_med, ts_lg]) #write test files to validity folder

In [None]:
#V606
#The reported Total Number of Entries Contained in Submission is not in the valid format. 
#Please review the information below and update your file accordingly.
#1) The required format for the Total Number of Entries Contained in Submission 
#is a whole number that is greater than zero, and it cannot be left blank.

#fail notes:
#TS LAR entries count changed to 0

sm, med, lg = load_lars() #load LAR data
write_mod_lars() #write mod LARs (overwrites other modified LAR data)
ts_sm, ts_med, ts_lg = load_ts_pandas() #load TS rows as datframe for modification

#change TS row data to fail edit
ts_sm.lar_entries = "0"
ts_med.lar_entries = "0"
ts_lg.lar_entries = "0"

write_mod_ts() #write mod TS rows to files
ts_sm, ts_med, ts_lg = load_mod_ts() #re-load TS data as strings
write_lar_files("v606", ts_rows=[ts_sm, ts_med, ts_lg])

In [None]:
#V607
#An invalid Federal Taxpayer Identification Number was provided. 
#Please review the information below and update your file accordingly.
#1) The required format for the Federal Taxpayer Identification Number is 99-9999999, and it cannot be left blank.

#fail notes
#dash removed from tax ID

sm, med, lg = load_lars()
write_mod_lars()
ts_sm, ts_med, ts_lg = load_ts_pandas()

#change TS rows to fail edit
ts_sm.tax_id = "999999999"
ts_med.tax_id = "999999999"
ts_lg.tax_id = "999999999"

write_mod_ts()

ts_sm, ts_med, ts_lg = load_mod_ts()
write_lar_files("v607", ts_rows=[ts_sm, ts_med, ts_lg])

In [None]:
#S305
#A duplicate transaction has been reported. Please review and update your file accordingly.

#fail notes:
#base edits files will all fail this as they are all copies of the same 3 LARs


In [None]:
#V608
#A ULI with an invalid format was provided. Please review the information below and update your file accordingly.
#1) The required format for ULI is alphanumeric with at least 23 characters and up to 45 characters, 
#and it cannot be left blank.

sm, med, lg = load_lars() #load base LAR files
#change LARs to fail edit
sm.uli = sm.uli.apply(lambda x: random_string(3))
med.uli = med.uli.map(lambda x: random_string(3))
lg.uli = lg.uli.map(lambda x: random_string(3))
write_mod_lars() #write modified LARs to files
write_lar_files("v608")

In [None]:
#V609
#An invalid ULI was reported. Please review the information below and update your file accordingly.
#1) Based on the check digit calculation, the ULI contains a transcription error.

#fail notes:
#an invalid check digit has been used instead of a valid one
sm, med, lg = load_lars() #load base LAR files
#change ULI check digit
sm.uli = sm.uli.map(lambda x: x[:-2] + check_digit_gen(valid=False, ULI=x))
med.uli = med.uli.map(lambda x: x[:-2] + check_digit_gen(valid=False, ULI=x))
lg.uli = lg.uli.map(lambda x: x[:-1] + check_digit_gen(valid=False, ULI=x))

write_mod_lars() #write modified LARs to files
write_lar_files("v609")

In [None]:
#V610
#An invalid data field was reported. Please review the information below and update your file accordingly.
#1) Application Date must be either a valid date using YYYYMMDD format or NA, and cannot be left blank.
#2) If Action Taken equals 6, then Application Date must be NA, and the reverse must be true.

#fail notes:
#action taken changed to 6

sm, med, lg = load_lars() #load base lar files
#modify LAR data
sm.action_taken = "6"
med.action_taken = "6" 
lg.action_taken = "6"

write_mod_lars() #write modified LARs to files
write_lar_files("v610")

In [None]:
#V611
#An invalid Loan Type was reported. Please review the information below and update your file accordingly.
#1) Loan Type must equal 1, 2, 3, or 4, and cannot be left blank.

#fail notes:
#loan type changed to 0

sm, med, lg = load_lars()
#modify LAR data
sm.loan_type = "0"
med.loan_type = "0"
lg.loan_type = "0"

write_mod_lars()
write_lar_files("v611")

In [None]:
#V612
#An invalid Loan Purpose was reported. Please review the information below and update your file accordingly.
#1) Loan Purpose must equal 1, 2, 31, 32, or 4, and cannot be left blank.
#2) If Preapproval equals 1, then Loan Purpose must equal 1.

#fail notes:
#loan purpose changed to 0

sm, med, lg = load_lars()
#modify LAR data
sm.loan_purpose = "0"
med.loan_purpose = "0"
lg.loan_purpose = "0"

write_mod_lars()
write_lar_files("v612")

In [None]:
#V613
#An invalid Preapproval data field was provided. Please review the information below and update your file accordingly.
#1) Preapproval must equal 1 or 2, and cannot be left blank.
#2) If Action Taken equals 7 or 8, then Preapproval must equal 1.
#3) If Action Taken equals 3, 4, 5 or 6, then Preapproval must equal 2.
#4) If Preapproval equals 1, then Action Taken must equal 1, 2, 7 or 8.

#fail notes:
#preapproval changed to 0

sm, med, lg = load_lars()
#modify LAR data
sm.preapproval = "0"
med.preapproval = "0"
lg.preapproval = "0"

write_mod_lars()
write_lar_files("v613")

In [None]:
#V614
#An invalid Preapproval was provided. Please review the information below and update your file accordingly.
#1) If Loan Purpose equals 2, 4, 31 or 32, then Preapproval must equal 2.
#2) If Multifamily Affordable Units is a number, then Preapproval must equal 2.
#3) If Reverse Mortgage equals 1, then Preapproval must equal 2.
#4) If Open-End Line of Credit equals 1, then Preapproval must equal 2.

#fail notes
#preapproval set to 0

sm, med, lg = load_lars()
#modify LAR data
sm.preapproval = "0"
med.preapproval = "0"
lg.preapproval = "0"

write_mod_lars()
write_lar_files("v614")

In [None]:
#V615
#An invalid Construction Method was reported. Please review the information below and update your file accordingly.
#1) Construction Method must equal 1 or 2, and cannot be left blank.
#2) If Manufactured Home Land Property Interest equals 1, 2, 3 or 4, then Construction Method must equal 2.
#3) If Manufactured Home Secured Property Type equals 1 or 2 then Construction Method must equal 2.

#fail notes:
#construction method set to blank

sm, med, lg = load_lars()
#modify LAR data
sm.const_method = ""
med.const_method = ""
lg.const_method = ""
write_mod_lars()
write_lar_files("v615")