In [1]:
# K. David Roell 8/23/2017
# This file will modify files created in CYOA test files so that the files fail specific edits


In [2]:
import os
import pandas as pd
import random
import string

pd.set_option("display.max_columns",110)

In [3]:
#Lists of names will be used for loading data into Pandas frames
LAR_field_names = [
    "record_id","lei","uli","app_date","loan_type","loan_purpose",
    "preapproval","const_method","occ_type","loan_amount","action_taken",
    "action_date","street_address","city","state","zip_code","county",
    "tract","app_eth_1","app_eth_2","app_eth_3","app_eth_4","app_eth_5",
    "app_eth_code_14","co_app_eth_1","co_app_eth_2",
    "co_app_eth_3","co_app_eth_4","co_app_eth_5","co_app_eth_code_14",
    "app_eth_basis","co_app_eth_basis","app_race_1","app_race_2",
    "app_race_3","app_race_4","app_race_5","app_race_code_1",
    "app_race_code_27","app_race_code_44","co_app_race_1","co_app_race_2",
    "co_app_race_3","co_app_race_4","co_app_race_5","co_app_race_code_1",
    "co_app_race_code_27","co_app_race_code_44","app_race_basis",
    "co_app_race_basis","app_sex","co_app_sex","app_sex_basis",
    "co_app_sex_basis","app_age","co_app_age","income","purchaser_type",
    "rate_spread","hoepa","lien","app_credit_score","co_app_credit_score",
    "app_score_name","app_score_code_8","co_app_score_name",
    "co_app_score_code_8","denial_1","denial_2","denial_3","denial_4",
    "denial_code_9","loan_costs","points_fees","origination_fee",
    "discount_points","lender_credits","interest_rate","prepayment_penalty",
    "dti","cltv","loan_term","intro_rate","balloon","int_only_pmts",
    "neg_amort","non_amort_features","property_value","manufactured_type",
    "manufactured_interest","total_units","affordable_units",
    "submission_type","initially_payable","mlo_id","aus_1","aus_2",
    "aus_3","aus_4","aus_5","aus_code_5","aus_result_1","aus_result_2",
    "aus_result_3","aus_result_4","aus_result_5","aus_code_16",
    "reverse_mortgage","open_end_credit","business_purpose"]

TS_field_names= [
    "record_id","inst_name","calendar_year","calendar_quarter",
    "contact_name","contact_tel","contact_email","contact_street_address",
    "office_city","office_state","office_zip","federal_agency",
    "lar_entries","tax_id","lei"]

In [4]:
#load TS rows for adding to LAR file parts
ts_row_sm = open("edits_files/file_parts/ts_small.txt", 'r').readline()
ts_row_med = open("edits_files/file_parts/ts_small.txt", 'r').readline()
ts_row_lg = open("edits_files/file_parts/ts_small.txt", 'r').readline()
ts_rows = [ts_row_sm, ts_row_med, ts_row_lg]

In [5]:
#common variables
path = "edits_files/file_parts/"
final_path ="edits_files/syntax/"
file_name = "lar_passes_{size}_no_ts.txt"

#Helper functions

def write_lar_files(edit_name, ts_rows=ts_rows):
    """Writes edit testing files to edit folder."""
    if edit_name[:1]=="s":
        edit_type = "syntax"
    elif edit_name[:1]=="v":
        edit_type = "validity"
    elif edit_name[:1]=="q":
        edit_type = "quality"
    
    source_path = "edits_files/file_parts/"
    final_path = "edits_files/" + edit_type + "/"
    source_names = ["_lar_sm.txt", "_lar_med.txt", "_lar_lg.txt"]
    sizes = ["sm", "med", "lg"]
    
    if not os.path.exists(final_path):
        os.makedirs(final_path)
    
    for source_name, size, ts_row in zip(source_names, sizes, ts_rows):
        outfile = edit_name+"_"+size+".txt"
        with open(source_path+source_name, 'r') as source_file:
            with open(final_path+outfile, 'w') as final_file:
                final_file.write(ts_row)
                for line in source_file.readlines():
                    final_file.write(line)

def write_mod_lars():
    """Writes modified LAR data to intermediate file prior to adding TS row."""
    sm.to_csv(path+"_lar_sm.txt", sep="|", index=False, header=False)
    med.to_csv(path+"_lar_med.txt", sep="|", index=False, header=False)
    lg.to_csv(path+"_lar_lg.txt", sep="|", index=False, header=False)

def write_mod_ts():
    """Writes modified TS data to intermediate file prior to adding LAR rows."""
    ts_sm.to_csv(path+"_ts_sm.txt", sep="|", index=False, header=False)
    ts_med.to_csv(path+"_ts_med.txt", sep="|", index=False, header=False)
    ts_lg.to_csv(path+"_ts_lg.txt", sep="|", index=False, header=False)
    
def rem_file_parts(path, files=[]):
    """Removes the file parts used in creating test files."""
    for file in files:
        os.remove(path+file)
        
def load_lar(size="small", path="edits_files/file_parts/", file_name="lar_passes_{size}_no_ts.txt"):
    """Returns a dataframe for the file specified. This function is deprecated"""
    return pd.read_csv(path+file_name.format(size=size), sep="|", header=None, names=LAR_field_names, dtype=object)

def load_mod_ts():
    """Loads modified TS files as strings for adding to LAR data."""
    sm = open("edits_files/file_parts/_ts_sm.txt", 'r').readline()
    med = open("edits_files/file_parts/_ts_med.txt", 'r').readline()
    lg = open("edits_files/file_parts/_ts_lg.txt", 'r').readline()
    return sm, med, lg

def load_lars():
    lar_sm = pd.read_csv("edits_files/file_parts/lar_passes_small_no_ts.txt", sep="|", header=None, names=LAR_field_names, dtype=object)
    lar_med = pd.read_csv("edits_files/file_parts/lar_passes_medium_no_ts.txt", sep="|", header=None, names=LAR_field_names, dtype=object)
    lar_lg = pd.read_csv("edits_files/file_parts/lar_passes_large_no_ts.txt", sep="|", header=None, names=LAR_field_names, dtype=object)
    return lar_sm, lar_med, lar_lg

def load_ts_pandas():
    sm = pd.read_csv("edits_files/file_parts/ts_small.txt", sep="|", dtype=object, header=None, names=TS_field_names)
    med = pd.read_csv("edits_files/file_parts/ts_medium.txt", sep="|", dtype=object, header=None, names=TS_field_names)
    lg = pd.read_csv("edits_files/file_parts/ts_large.txt", sep="|", dtype=object, header=None, names=TS_field_names)
    return sm, med, lg

In [6]:
#S300
#The data provided in the file is incorrect. Please review the information below and update your file accordingly.
#The following criteria must be met:
#1) The first row of your file must begin with a 1; and
#2) Any subsequent rows must begin with a 2.

#fail notes:
#file has record id for transmittal sheet =2 and record id for lar =1

#make path to edit category if it does not exist
if not os.path.exists(final_path):
    os.makedirs(final_path)

#change ts row record id to 2
s300_ts_sm = "2"+ts_row_sm[1:] 
s300_ts_med = "2"+ts_row_med[1:]
s300_ts_lg = "2"+ts_row_lg[1:]

#load base LAR data for modification
sm, med, lg = load_lars()
#modify LAR data to fail edit, change record id of lar to 1
sm.record_id = "1"
med.record_id = "1"
lg.record_id = "1"
#write modified LAR data to file
write_mod_lars()
#write test file to syntax folder
write_lar_files("s300", [s300_ts_sm, s300_ts_med, s300_ts_lg])

In [7]:
#S301
#The LEI in this row does not match the reported LEI in the transmittal sheet (the first row of your file). 
#Please update your file accordingly.

#fail notes
#LEI in LAR rows has been changed to a random string of ASCII characters

#load base LAR data for modification
sm, med, lg = load_lars()
#change LEI in LAR rows
sm.lei = (''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)))
med.lei = (''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)))
lg.lei = (''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20)))
#write modified LAR data to file
write_mod_lars()
#write test file to syntax folder
write_lar_files("s301")
#remove file parts used to create edit
rem_file_parts(path, files=['_lar_sm.txt', '_lar_med.txt', '_lar_lg.txt'])

In [8]:
#V600
#An LEI in an invalid format was provided. Please review the information below and update your file accordingly.
#1) The required format for LEI is alphanumeric with 20 characters, and it cannot be left blank.

#fail notes
#LEI is blank for each LAR row

#load base LAR data
sm, med, lg = load_lars()
#change LEI to invalid format (blank)
sm.lei = ""
med.lei = ""
lg.lei = ""
#write modified LAR data to file
write_mod_lars()
#write test file to validity folder
write_lar_files("v600")


In [9]:
#S302
#The reported Calendar Year does not match the filing year indicated at the start of the filing. 
#Please confirm the information below and update your file accordingly.
#1) The correct file has been uploaded; and
#2) The correct filing year was chosen at the start of the filing; and
#3) The calendar year is listed correctly in the file.

#fail notes
#Calendar Year in the TS row has been changed to 2010

#load base LAR data
sm, med, lg = load_lars()
#change Calendar Year to 2010 (invalid for 2018)
s302_ts_row_sm = ts_row_sm[:13] + "2010" + ts_row_sm[17:]
s302_ts_row_med = ts_row_med[:13] + "2010" + ts_row_med[17:]
s302_ts_row_lg = ts_row_lg[:13] + "2010" + ts_row_lg[17:]
#write mod LARs (overwrites other modified LARs)
write_mod_lars()
#write test file to syntax folder
write_lar_files("s302", ts_rows=[s302_ts_row_sm, s302_ts_row_med, s302_ts_row_lg])

In [10]:
#S303
#The reported Federal Agency; Federal Taxpayer Identification Number; 
#and Legal Entity Identifier must match the Federal Agency; Federal Taxpayer Identification Number; 
#and Legal Entity Identifier for the financial institution for which you are filing. 
#Please confirm the information below and update your file accordingly.
#1) The correct financial institution was at the start of the filing; and
#2) The correct file was uploaded; and
#3) The Federal Agency, Federal Taxpayer Identification Number, 
#and Legal Entity Identifier are reported correctly in the file.

#fail nots
#Requires a match against panel
#Discuss how to implement this file

In [11]:
#S304
#The reported Total Number of Entries Contained in Submission does not match the total number of LARs in the HMDA file.
#Please update your file accordingly.

#fail notes:
#TS rows have been re-ordered sm>md, md>lg, lg>sm

#load base LAR data
sm, med, lg = load_lars()
#change TS rows
s304_ts_sm = ts_row_med
s304_ts_med = ts_row_lg
s304_ts_lg = ts_row_sm
#write mod LARs (overwrites other modified LAR files)
write_mod_lars()
#write test file to syntax folder
write_lar_files("s304", ts_rows=[s304_ts_sm, s304_ts_med, s304_ts_lg])

In [12]:
#V601
#The following data fields are required, and cannot be left blank. A blank value(s) was provided. 
#Please review the information below and update your file accordingly.
#1) Financial Institution Name;
#2) Contact Person's Name;
#3) Contact Person's E-mail Address;
#4) Contact Person's Office Street Address;
#￼5) Contact Person's Office City

#fail notes
#contact data has been changed to blank
change_fields = ["contact_name","contact_tel","contact_email","contact_street_address","office_city"]
#load base LAR data
sm, med, lg = load_lars()
#load TS rows as dataframes for modification (prevents issues with changes to field values)
ts_sm, ts_med, ts_lg = load_ts_pandas()
#change TS row data to fail edit
for field in change_fields:
    ts_sm[field] = ""
    ts_med[field] = ""
    ts_lg[field] = ""
#write modified TS data to files
write_mod_ts()
#write mod LARs (overwrites other modified LAR data)
write_mod_lars()
#re-load TS data as strings
ts_sm, ts_med, ts_lg = load_mod_ts()
#write test file to validity folder
write_lar_files("v601", ts_rows=[ts_sm, ts_med, ts_lg])

In [17]:
#V602
#An invalid Calendar Quarter was reported. Please review the information below and update your file accordingly.
#1) Calendar Quarter must equal 4, and cannot be left blank.

#fail notes:
#calendar quarter has been changed to blank

#load base LAR data
sm, med, lg = load_lars()
#write mod LARs (overwrites other modified LAR data)
write_mod_lars()
#load TS rows as dataframe for modification
ts_sm, ts_med, ts_lg = load_ts_pandas()
#change TS row data to fail edit
ts_sm.calendar_quarter = ""
ts_med.calendar_quarter = ""
ts_lg.calendar_quarter = ""
#write modified TS data
write_mod_ts()
#re-load TS data as strings
ts_sm, ts_med, ts_lg = load_mod_ts()
#write test files to validity folder
write_lar_files("v602", ts_rows=[ts_sm, ts_med, ts_lg])

In [20]:
#v603
#An invalid Contact Person's Telephone Number was provided. 
#Please review the information below and update your file accordingly.
#1) The required format for the Contact Person's Telephone Number is 999-999-9999, and it cannot be left blank.

#fail notes:
#Contact phone has been changed to 555-5555

#load base LAR data
sm, med, lg = load_lars()
#write mod LARs (overwrites other modified LAR data)
write_mod_lars()
#load TS rows as dataframe for modification
ts_sm, ts_med, ts_lg = load_ts_pandas()
#change TS row data to fail edit
ts_sm.contact_tel = "555-5555"
ts_med.contact_tel = "555-5555"
ts_lg.contact_tel = "555-5555"
#write mod TS rows to files
write_mod_ts
#re-load TS data as strings
ts_sm, ts_med, ts_lg = load_mod_ts()
#write test files to validity folder
write_lar_files("v603", ts_rows=[ts_sm, ts_med, ts_lg])

In [None]:
#V604
#An invalid Contact Person's Office State was provided. 
#Please review the information below and update your file accordingly.
#1) Contact Person's Office State must be a two letter state code, and cannot be left blank.

#fail notes:
#Office State has been changed to 3 characters

