In [3]:
#K. David Roell, 8/22/2017
#This notebook will create LAR edit testing files using the scenarios outlined in 
#the [HMDA loan scenarios](https://www.consumerfinance.gov/data-research/hmda/static/for-filers/HMDA-Loan-Scenarios.pdf) PDF.
#
#


In [13]:
from collections import OrderedDict, namedtuple
import os
import pandas as pd
import random

display.max_columns = 110

In [26]:
#LAR row counts
#these values are present in the make_rows function as well
lar_small = 200
lar_medium = 1000
lar_large = 10000

#create named tuple for TS rows (small, medium, large)
ts_tuple = namedtuple('ts_row', ['record_id', 'institution_name','calendar_year', 'calendar_quarter', 'contact_name',
                                'contact_telephone', 'contact_email', 'contact_address','office_city', 'office_state',
                                'office_zip', 'federal_agency', 'lar_entries', 'tax_id', 'lei'])

#set TS row example variables
ts_row_small = ts_tuple('1', 'Ficus Bank', '2018', '4', 'Smug Pockets', '555-555-5555', 'pockets@ficus.com', 
                        '1234 Ficus Lane', 'Ficusville', 'UT', '84096', '1', str(lar_small), '01-0123456',
                        '10Bx939c5543TqA1144M')
ts_row_medium = ts_tuple('1', 'Ficus Bank', '2018', '4', 'Smug Pockets', '555-555-5555', 'pockets@ficus.com', 
                        '1234 Ficus Lane', 'Ficusville', 'UT', '84096', '1', str(lar_medium), '01-0123456',
                        '10Bx939c5543TqA1144M')
ts_row_large = ts_tuple('1', 'Ficus Bank', '2018', '4', 'Smug Pockets', '555-555-5555', 'pockets@ficus.com', 
                        '1234 Ficus Lane', 'Ficusville', 'UT', '84096', '1', str(lar_large), '01-0123456',
                        '10Bx939c5543TqA1144M')

if not os.path.exists("edits_files/file_parts/"):
    os.makedirs("edits_files/file_parts/")
    
with open("edits_files/file_parts/ts_small.txt", 'w') as f:
    f.write('|'.join(ts_row_small))
with open("edits_files/file_parts/ts_medium.txt", 'w') as f:
    f.write('|'.join(ts_row_medium))
with open("edits_files/file_parts/ts_large.txt", 'w') as f:
    f.write('|'.join(ts_row_large))



In [20]:
#Set LAR values as strings
#data sourced from HMDA loan scenarios, see above for link

#Single-Family closed-end loan example
single_fam_closed_end = "2|10Bx939c5543TqA1144M|10Bx939c5543TqA1144M999143X38|20180721|1|1|2|1|1|162000|1|20180912|456 Somewhere Ave|Los Angeles|CA|90049|06037|06037264000|12||||||2||||||2|2|5||||||||41||||||||2|2|1|2|2|2|39|32|123|1|0.428|2|1|794|803|2||6||10|||||5672|NA|1802|||3.875|NA|42|80|360|NA|2|2|2|2|202500|3|5|1|NA|1|1|12345|1||||||1||||||2|2|2"

#Non-natural person multi-family purchased loan example
non_natural_multi_purch = "2|10Bx939c5543TqA1144M|10Bx939c5543TqA1144M999143X38|20180721|1|1|2|1|3|585000|1|20180912|456 Somewhere Ave|Los Angeles|CA|90049|06037|06037264000|4||||||5||||||3|4|7||||||||8||||||||3|4|4|5|3|4|8888|9999|NA|3|NA|3|1|8888|9999|9||10||10|||||NA|NA|NA|NA|NA|3.875|NA|NA|59|360|NA|2|2|2|2|985500|3|5|10|5|1|1|12345|6||||||17||||||2|2|1"

#Open-end line of credit example
open_end_credit = "2|10Bx939c5543TqA1144M|10Bx939c5543TqA1144M999143X38|20180721|1|2|2|1|1|40000|1|20180912|456 Somewhere Ave|Los Angeles|CA|90049|06037|06037264000|12||||||2||||||2|2|5||||||||41||||||||2|2|1|2|2|2|39|32|123|0|0.428|2|2|794|803|2||6||10|||||NA|NA|NA|NA|NA|3.875|NA|42|80|240|12|2|2|2|2|202500|3|5|1|NA|1|1|12345|6||||||17||||||2|1|2"


In [24]:
def make_row_list(size='small', proportions=[.50, .25, .25], ts=False):
    """Creates a test file with set parameters. Number of rows should be divisible by 4 for consistent results."""
    rows = []
    #set number of lar rows to create (for some edits this must match the TS row field lar entries)
    if size =='small':
        num_rows = 200
    elif size =='medium':
        num_rows = 1000
    elif size=='large':
        num_rows = 10000
        
    #set proportion of different row types
    single_fam = int(num_rows * proportions[0])
    purchased = int(num_rows * proportions[1])
    open_end_lines = int(num_rows * proportions[2])
    
    #add TS row to beginning of rows to be written to file
    if ts:
        rows.append("|".join(ts_row_small))
    
    #append rows of 3 different LAR types in proportions defined in function args
    for row in range(single_fam):
        rows.append(single_fam_closed_end)
    for row in range(purchased):
        rows.append(non_natural_multi_purch)
    for row in range(open_end_lines):
        rows.append(open_end_credit)
    return rows #return the list of LAR rows

def write_file(row_input=None, directory="edits_files/", name="passes_all.txt"):
    """Writes rows of TS and LAR data to a file"""
    if not os.path.exists(directory):
        os.makedirs(directory)
    with open(directory + name, 'w') as final_file:
        for line in row_input:
            final_file.write("{line}\n".format(line=line))
            
file_parts_dir = "edits_files/file_parts/"
full_file_dir = "edits_files/"
write_file(make_row_list(),directory=file_parts_dir, name="lar_passes_small_no_ts.txt")
write_file(make_row_list(size="medium"),directory=file_parts_dir, name="lar_passes_medium_no_ts.txt")
write_file(make_row_list(size="large"),directory=file_parts_dir, name="lar_passes_large_no_ts.txt")

write_file(make_row_list(ts=True),directory=full_file_dir, name="lar_passes_small.txt")
write_file(make_row_list(ts=True, size="medium"),directory=full_file_dir, name="lar_passes_medium.txt")
write_file(make_row_list(ts=True, size="large"),directory=full_file_dir, name="lar_passes_large.txt")

In [25]:

def string_gen(length):
    """Generates a string of random ascii characters of the chosen length."""
    return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length))

def check_digit_gen(valid=True, ULI='10Bx939c5543TqA1144M999143X'):
    """Generates a check digit for a ULI in accordance with 
    https://www.consumerfinance.gov/eregulations/diff/1003-C/2015-26607_20170101/2015-26607_20180101?from_version=2015-26607_20170101#1003-C-1"""
    if ULI is None:
        raise ValueError("a ULI must be supplied")
    #GENERATING A CHECK DIGIT
    #Step 1: Starting with the leftmost character in the string that consists of the combination of the 
    #Legal Entity Identifier (LEI) pursuant to § 1003.4(a)(1)(i)(A) and the additional characters identifying the 
    #covered loan or application pursuant to § 1003.4(a)(1)(i)(B), replace each alphabetic character with numbers 
    #in accordance with Table I below to obtain all numeric values in the string.
    
    
    #1: convert letters to digits
    #2: append '00' to right of string
    #3:Apply the mathematical function mod=(n, 97) where n= the number obtained in step 2 above and 97 is the divisor.
    #3a: Alternatively, to calculate without using the modulus operator, divide the numbers in step 2 above by 97. 
    #   Truncate the remainder to three digits and multiply it by .97. Round the result to the nearest whole number.
    #4: Subtract the result in step 3 from 98. If the result is one digit, add a leading 0 to make it two digits.
    #5: The two digits in the result from step 4 is the check digit. Append the resulting check digit to the 
    #   rightmost position in the combined string of characters described in step 1 above to generate the ULI.
    
    #digit_vals contains the conversion of numbers to letters
    digit_vals = {
    'A':10, 'H':17,'O':24,'V':31,'B':11,'I':18,'P':25,'W':32,'C':12,'J':19,'Q':26,'X':33,'D':13,'K':20,'R':27,'Y':34,
    'E':14,'L':21,'S':28,'Z':35,'F':15,'M':22,'T':29,'G':16,'N':23,'U':30}
    
    uli_chars = list(ULI)
    mod_uli_chars = []
    for char in uli_chars:
        if char.upper() in digit_vals.keys():
            mod_uli_chars.append(str(digit_vals[char.upper()]))
        else:
            mod_uli_chars.append(char)
    mod_uli_chars.append('00') 
    digit_base = int("".join(mod_uli_chars))
    digit_modulo = digit_base % 97
    check_digit = 98 - digit_modulo
    
    if valid:
        return str(check_digit).zfill(2) #left pad check digit with 0 if length is less than 2
    else:
        return str(check_digit+6).zfill(2)[:2] #return a bad check digit (used in edit testing)
