In [1]:
from py2neo import Node, Graph, Relationship

graph = Graph("bolt://neo4j:7687", name="neo4j", password="password")

In [2]:
graph.run("MATCH (n) DETACH DELETE n").stats()

{'nodes_deleted': 226, 'relationships_deleted': 1200}

In [3]:
query1 = '''
LOAD CSV WITH HEADERS FROM 
'file:///credit_scores3.csv' AS row
WITH row limit 200
WITH row,
(CASE 
    WHEN ToFloat(row.DTI) <0.2 THEN 'Very Good'
    WHEN ToFloat(row.DTI) <0.35 THEN 'Good'
    WHEN ToFloat(row.DTI) <0.4 THEN 'Fair'
    ELSE 'Poor'
END) AS dti_bin,
(CASE
    WHEN ToInteger(row.Credit_Score) < 580 THEN 'Poor'
    WHEN ToInteger(row.Credit_Score) < 670 THEN 'Fair'
    WHEN ToInteger(row.Credit_Score) < 740 THEN 'Good'
    WHEN ToInteger(row.Credit_Score) < 780 THEN 'Very Good'
    ELSE 'Excellent'
END) AS credit_score_bin

CREATE (c:customer {customer_id: (row.Customer_ID), age: ToFloat(row.LOB_Code), product_name: (row.Product_Name)})

MERGE (pd:probdefault {def : row.POD})
MERGE (c)-[:has_default]->(pd)

MERGE (pc:prod_code {product_name: row.Product_Name})
MERGE (c)-[:has_product]->(pc)

MERGE (mtb:missedpayment {missed_pay_bin : row.PH_Status})
MERGE (c)-[:has_missed_payments {missed_payments: ToInteger(row.Missed_payments)}]->(mtb)

MERGE (dd:delinq {delinq : row.Deliquent_days})
MERGE (c)-[:has_delinq_days]->(dd)

MERGE (csb:creditscore {credit_score_bin : credit_score_bin})
MERGE (c)-[:has_credit_score {credit_score: ToInteger(row.Credit_Score)}]->(csb)

MERGE (dti:debttoincome {dti_bin : dti_bin})
MERGE (c)-[:has_debt_to_income_ratio {debt_to_income_ratio: ToFloat(row.DTI)}]->(dti)
'''

In [4]:
graph.run(query1).stats()

{'labels_added': 226,
 'relationships_created': 1200,
 'nodes_created': 226,
 'properties_set': 1026}

In [5]:
query2 = '''
        MATCH (pd:probdefault)<-[:has_default]-(c:customer)-[:has_product]->(pc)
        WITH pd.def as default, pc.product_name as product, count(DISTINCT c) as customer_count
        RETURN default, product, customer_count
'''

graph.run(query2).to_table()

default,product,customer_count
0,Personal,29
0,Mortgage,23
0,Home Improvement,22
0,Line of Credit,24
0,Real Estate,21
0,Equipment Finance,28
0,Car Loan,18
1,Line of Credit,7
1,Real Estate,7
1,Equipment Finance,6


In [6]:
query3 = '''
        MATCH (pd:probdefault)<-[:has_default]-(c:customer)-[r:has_missed_payments]->(mtb:missedpayment)
        WITH pd.def as default, mtb.missed_pay_bin as missedbin, c.product_name as product, count(DISTINCT c) as customer_count
        RETURN default, missedbin, product, customer_count
'''

In [6]:
query4 = '''
        MATCH (pd:probdefault {def : '1'})<-[:has_default]-(c:customer)-[r1:has_missed_payments]->(mtb:missedpayment)
        MATCH (c:customer)-[r2:has_credit_score]->(csb:creditscore)
        MATCH (c:customer)-[r3:has_debt_to_income_ratio]->(dti:debttoincome)
        MATCH (c:customer)-[r4:has_delinq_days]->(dd:delinq)
        RETURN c.customer_id, c.product_name as product, r1.missed_payments as missed_payment,
        r2.credit_score as credit_score, round(r3.debt_to_income_ratio,3) as dti, dd.delinq as delinquent_days limit 10
'''
graph.run(query4).to_table()

c.customer_id,product,missed_payment,credit_score,dti,delinquent_days
8405543,Line of Credit,4,450,0.56,0
5452627,Real Estate,9,445,0.203,30
8361074,Equipment Finance,5,569,0.525,30
6959955,Line of Credit,7,498,0.305,30
3253960,Mortgage,4,481,0.388,0
6741599,Real Estate,9,445,0.05,0
1031153,Home Improvement,15,355,0.328,0
5319402,Equipment Finance,10,457,0.493,0
4700125,Equipment Finance,8,418,0.337,30
9765785,Car Loan,9,341,0.244,0


In [9]:
graph.run("MATCH (n) DETACH DELETE n").stats()

{'nodes_deleted': 45, 'relationships_deleted': 120}