In [1]:
from tree import *
%load_ext autoreload
%autoreload 2

In [2]:
tree_reader = ZippedCSVReader("trees.zip")
data_reader = ZippedCSVReader("mini.zip")

In [3]:
print(data_reader.paths)

['il.csv', 'la.csv', 'wi.csv']


In [4]:
tree = tree_reader.load_json("simple.json")
print(tree.keys())

dict_list = data_reader.rows("wi.csv")
print()
print(dict_list[0])

dict_list = data_reader.rows()
print()
print(len(dict_list))

dict_keys(['field', 'threshold', 'left', 'right'])

{'as_of_year': '2017', 'respondent_id': '33-0975529', 'agency_name': 'Department of Housing and Urban Development', 'agency_abbr': 'HUD', 'agency_code': '7', 'loan_type_name': 'VA-guaranteed', 'loan_type': '3', 'property_type_name': 'One-to-four family dwelling (other than manufactured housing)', 'property_type': '1', 'loan_purpose_name': 'Refinancing', 'loan_purpose': '3', 'owner_occupancy_name': 'Owner-occupied as a principal dwelling', 'owner_occupancy': '1', 'loan_amount_000s': '165', 'preapproval_name': 'Not applicable', 'preapproval': '3', 'action_taken_name': 'Loan originated', 'action_taken': '1', 'state_name': 'Wisconsin', 'state_abbr': 'WI', 'state_code': '55', 'county_name': 'Outagamie County', 'county_code': '87.0', 'applicant_ethnicity_name': 'Not Hispanic or Latino', 'applicant_ethnicity': '2', 'co_applicant_ethnicity_name': 'Not Hispanic or Latino', 'co_applicant_ethnicity': '2', 'applicant_race_name_1': 'White', 'appli

In [5]:
loan = Loan(40, "Home improvement", "Asian", 120, "approve")

In [6]:
loan

Loan(40, 'Home improvement', 'Asian', 120, 'approve')

In [7]:
loan["amount"]

40

In [8]:
loan["purpose"]

'Home improvement'

In [9]:
loan["Refinance"]

0

In [10]:
loan["Asian"]

1

In [11]:
data_reader

<tree.ZippedCSVReader at 0x7f5ee86efa58>

In [12]:
b = Bank(None, data_reader)
b

<tree.Bank at 0x7f5ec33c6da0>

In [13]:
b = Bank("NCUA", data_reader)
for loan in b.loans():
    print(loan) # loan is of type Loan

Loan(94, 'Refinancing', 'Information not provided by applicant in mail, Internet, or telephone application', 71, 'deny')
Loan(55, 'Home purchase', 'White', 41, 'deny')
Loan(20, 'Refinancing', 'Black or African American', 41, 'approve')
Loan(22, 'Refinancing', 'White', 36, 'approve')
Loan(175, 'Refinancing', 'White', 70, 'approve')
Loan(191, 'Home purchase', 'Information not provided by applicant in mail, Internet, or telephone application', 68, 'approve')
Loan(82, 'Refinancing', 'White', 40, 'deny')


In [14]:
reader = ZippedCSVReader('loans.zip')
names = get_bank_names(reader) # should be sorted alphabetically
print(names)

['CFPB', 'FDIC', 'FRS', 'HUD', 'NCUA', 'OCC']


In [15]:
spred = SimplePredictor()
my_loans = [Loan(175, 'Refinancing', 'White', 70, 'approve'),
            Loan(145, 'Home purchase', 'White', 37, 'deny'),
            Loan(200, 'Home purchase', 'White', 95, 'approve'),
            Loan(414, 'Home purchase', 'White', 300, 'approve'),
            Loan(22, 'Refinancing', 'White', 36, '1')]

for loan in my_loans:
    print(loan, 'predict:', spred.predict(loan))
    print('approved:', spred.get_approved(), 'denied', spred.get_denied())

Loan(175, 'Refinancing', 'White', 70, 'approve') predict: True
approved: 1 denied 0
Loan(145, 'Home purchase', 'White', 37, 'deny') predict: False
approved: 1 denied 1
Loan(200, 'Home purchase', 'White', 95, 'approve') predict: False
approved: 1 denied 2
Loan(414, 'Home purchase', 'White', 300, 'approve') predict: False
approved: 1 denied 3
Loan(22, 'Refinancing', 'White', 36, '1') predict: True
approved: 2 denied 3


In [16]:
node_dicts = {
    "field": "amount",
    "threshold": 200,
    "left": 
        {
        "field": "income",
        "threshold": 35,
        "left": 
            {
            "field": "class",
            "threshold": 0,
            "left": None,
            "right": None
            },
        "right": 
            {
            "field": "class",
            "threshold": 1,
            "left": None,
            "right": None
            }
    },
    "right": 
        {
        "field": "income",
        "threshold": 70,
        "left": 
            {
            "field": "class",
            "threshold": 0,
            "left": None,
            "right": None
            },
        "right": 
            {
            "field": "class",
            "threshold": 1,
            "left": None,
            "right": None
        }
    }
}

In [17]:
dt = DTree(node_dicts)
dt.dump()

amount <= 200
  income <= 35
    class=0
    class=1
  income <= 70
    class=0
    class=1


In [18]:
tree_reader = ZippedCSVReader("trees.zip")
dt = DTree(tree_reader.load_json("simple.json"))
dt.dump()

amount <= 200
  income <= 35
    class=0
    class=1
  income <= 70
    class=0
    class=1


In [19]:
tree_reader = ZippedCSVReader("trees.zip")
dt = DTree(tree_reader.load_json("good.json"))
dt.node_count()

61

In [20]:
tree_reader = ZippedCSVReader("trees.zip")
dt = DTree(tree_reader.load_json("simple.json"))
loan = Loan(40, "Home improvement", "Asian", 120, "approve")
dt.predict(loan)

True

In [21]:
b = Bank(None, ZippedCSVReader("loans.zip"))
dt = DTree(ZippedCSVReader("trees.zip").load_json("bad.json"))
bias_percent = bias_test(b, dt, "Black or African American")
print(bias_percent)

0.41123833757421546
