In [3]:
import json
import re
import urllib
from pprint import pprint
import time
from tqdm import tqdm

from py2neo import Node, Graph, Relationship, NodeMatcher
from py2neo.bulk import create_nodes,merge_nodes,create_relationships

import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
#configure neo4j
graph = Graph("bolt://localhost:7687", name="neo4j", password="pwd")
nodes_matcher = NodeMatcher(graph)

In [5]:
neo_data = pd.read_csv("NODES23.csv")
neo_data.drop(columns=["Unnamed: 0"],inplace=True)
neo_data.head(2)

Unnamed: 0,case,url,crime,crime_json,crime_details,crime_details_json,victim,victim_json,suspect,suspect_json
0,HKLII\n\n\n\nDatabases\n\n\n\nCourt of First I...,https://www.hklii.hk/en/cases/hkcfi/2018/2859,"\n\n{\n ""crime"": {\n ""caseid"": ""HCCC105/20...",Good,"\n\n{\n ""crime details"": {\n\n""timeline"": ""1...",Good,"\n{\n ""victim"": {\n ""name"" : ""Madam Chan Sau...",Good,"\n\n{\n ""suspect"": {\n ""name"": ""NGAN Wing-...",Bad
1,HKLII\n\n\n\nDatabases\n\n\n\nCourt of First I...,https://www.hklii.hk/en/cases/hkcfi/2016/1974,"\n\n{\n ""crime"": {\n ""caseid"": ""HCCC183/20...",Good,"\n\n{\n ""crime details"": {\n\n""timeline"": ""f...",Good,"\n{\n ""victim"": {\n ""name"" : ""Sumarti Ningsi...",Good,"\n \n {\n ""suspect"": {\n ""name"" : ""Jutting R...",Bad


In [9]:
data_nodes = [
    {"name": "drug_problem"},
    {"name": "health_problem"},
    {"name": "financial_problem"},
    {"name": "prior_criminal_record"},
    {"name": "murder"},
    {"name": "employed"},
    {"name": "motive"},
    {"name": "suicide"}
]
create_nodes(graph.auto(), data_nodes, labels={"Category"})

create_nodes(graph.auto(), [{"type": 'Hong Kong Special Administrative Region'}], labels={"Location"})

In [10]:
def create_node_relationships(graph, case_details, crime_details, suspect_details,victim_details):

    crime_data = [(case_details["caseid"], {}, (suspect_details["name"],suspect_details["age"],suspect_details["gender"]))]
    
    create_relationships(graph.auto(), crime_data, "COMMITED_BY", \
        start_node_key=("Case", "caseid"), end_node_key=("Suspect", "name","age","gender"))
    
    
    crime_data = [(case_details["caseid"], {}, (victim_details["name"],victim_details["age"],victim_details["gender"]))]
    create_relationships(graph.auto(), crime_data, "COMMITED_ON", \
        start_node_key=("Case", "caseid"), end_node_key=("Victim", "name","age","gender"))
    
    
    crime_data = [(case_details["caseid"], {"secluded" : case_details["is_remote"]}, case_details["location"])]
    create_relationships(graph.auto(), crime_data, "COMMITED_AT", \
        start_node_key=("Case", "caseid"), end_node_key=("Location", "type"))
    
    
    crime_data = [(case_details["caseid"], {"details": case_details["case_datetime"]}, "murder")]
    create_relationships(graph.auto(), crime_data, "TYPE_OF_CRIME", \
        start_node_key=("Case", "caseid"), end_node_key=("Category", "name"))
    
    
    if victim_details["is_related"].lower() =="yes":
        crime_data = [((victim_details["name"],victim_details["age"],victim_details["gender"]), {"type": victim_details["relationship"]}, (suspect_details["name"],suspect_details["age"],suspect_details["gender"]) )]
        create_relationships(graph.auto(), crime_data, "RELATED_TO", \
        start_node_key=("Victim", "name","age","gender"), end_node_key=("Suspect", "name","age","gender"))
        
    if suspect_details["know_victim"].lower() =="yes":
        crime_data = [((suspect_details["name"],suspect_details["age"],suspect_details["gender"]), {}, (victim_details["name"],victim_details["age"],victim_details["gender"]))]
        create_relationships(graph.auto(), crime_data, "KNOWS", \
        start_node_key=("Suspect", "name","age","gender"), end_node_key=("Victim", "name","age","gender"))
        
    
    if suspect_details["is_ill"].lower() =="yes":
        crime_data = [((suspect_details["name"],suspect_details["age"],suspect_details["gender"]), {"details": suspect_details['illness_indicators']}, "health_problem")]
        create_relationships(graph.auto(), crime_data, "HAS", \
        start_node_key=("Suspect", "name","age","gender"), end_node_key=("Category", "name"))
        
    if victim_details["is_ill"].lower() =="yes":
        crime_data = [((victim_details["name"],victim_details["age"],victim_details["gender"]), {"details": victim_details['illness_indicators']}, "health_problem")]
        create_relationships(graph.auto(), crime_data, "HAS", \
        start_node_key=("Victim", "name","age","gender"), end_node_key=("Category", "name"))
        
    if suspect_details["on_drugs"].lower() =="yes":
        crime_data = [((suspect_details["name"],suspect_details["age"],suspect_details["gender"]), {"details": suspect_details['drug_indicators']}, "drug_problem")]
        create_relationships(graph.auto(), crime_data, "HAS", \
        start_node_key=("Suspect", "name","age","gender"), end_node_key=("Category", "name"))
        
    if victim_details["on_drugs"].lower() =="yes":
        crime_data = [((victim_details["name"],victim_details["age"],victim_details["gender"]), {"details": victim_details['drug_indicators']}, "drug_problem")]
        create_relationships(graph.auto(), crime_data, "HAS", \
        start_node_key=("Victim", "name","age","gender"), end_node_key=("Category", "name"))
        
    if suspect_details["employment"].lower() =="yes":
        crime_data = [((suspect_details["name"],suspect_details["age"],suspect_details["gender"]), {"details": suspect_details['employment_indicators']}, "employed")]
        create_relationships(graph.auto(), crime_data, "HAS", \
        start_node_key=("Suspect", "name","age","gender"), end_node_key=("Category", "name"))
        
    if suspect_details["money_trouble"].lower() =="yes":
        crime_data = [((suspect_details["name"],suspect_details["age"],suspect_details["gender"]), {"details": suspect_details['money_trouble_indicators']}, "financial_problem")]
        create_relationships(graph.auto(), crime_data, "HAS", \
        start_node_key=("Suspect", "name","age","gender"), end_node_key=("Category", "name"))
        
        
    if victim_details["any_employment"].lower() =="yes":
        crime_data = [((victim_details["name"],victim_details["age"],victim_details["gender"]), {"details": victim_details['employment_indications']}, "employed")]
        create_relationships(graph.auto(), crime_data, "HAS", \
        start_node_key=("Victim", "name","age","gender"), end_node_key=("Category", "name"))
    
    if suspect_details["has_prior_records"].lower() =="yes":
        crime_data = [((suspect_details["name"],suspect_details["age"],suspect_details["gender"]), {"details": suspect_details['details of prior criminal record']}, "prior_criminal_record")]
        create_relationships(graph.auto(), crime_data, "HAS", \
        start_node_key=("Suspect", "name","age","gender"), end_node_key=("Category", "name"))
    
    if suspect_details["motive"].lower() =="yes":
        crime_data = [((suspect_details["name"],suspect_details["age"],suspect_details["gender"]), {"details": suspect_details['motive_indicator']}, "motive")]
        create_relationships(graph.auto(), crime_data, "HAS", \
        start_node_key=("Suspect", "name","age","gender"), end_node_key=("Category", "name"))
        
    
    if suspect_details["committed_suicide"].lower() =="yes":
        crime_data = [((suspect_details["name"],suspect_details["age"],suspect_details["gender"]), {"details": suspect_details['suicide_indicator']}, "suicide")]
        create_relationships(graph.auto(), crime_data, "HAS_COMMITTED", \
        start_node_key=("Suspect", "name","age","gender"), end_node_key=("Category", "name"))
    
    
    return ()

In [11]:
for ind in neo_data.index:
    #print(ind)
    
    create_nodes(graph.auto(), [json.loads(neo_data["crime"][ind])['crime']], labels={"Case"})
    create_nodes(graph.auto(), [json.loads(neo_data["suspect"][ind])["suspect"]], labels={"Suspect"})
    
    if json.loads(neo_data["crime"][ind])['crime']['location'] !="Hong Kong Special Administrative Region":
        create_nodes(graph.auto(), [{"type": json.loads(neo_data["crime"][ind])['crime']['location']}], labels={"Location"})

    # if multiple victims
    if (type(json.loads(neo_data['victim'][ind])['victim'])!=list):
        create_nodes(graph.auto(), [json.loads(neo_data["victim"][ind])["victim"]], labels={"Victim"})
        create_node_relationships(graph,json.loads(neo_data["crime"][ind])['crime'],
                         json.loads(neo_data["crime_details"][ind])['crime details'],
                         json.loads(neo_data["suspect"][ind])['suspect'],
                         json.loads(neo_data["victim"][ind])['victim'])
    else:
        create_nodes(graph.auto(), [json.loads(neo_data["victim"][ind])["victim"][0]], labels={"Victim"})
        create_nodes(graph.auto(), [json.loads(neo_data["victim"][ind])["victim"][1]], labels={"Victim"})
        create_node_relationships(graph,json.loads(neo_data["crime"][ind])['crime'],
                         json.loads(neo_data["crime_details"][ind])['crime details'],
                         json.loads(neo_data["suspect"][ind])['suspect'],
                         json.loads(neo_data["victim"][ind])['victim'][0])
        create_node_relationships(graph,json.loads(neo_data["crime"][ind])['crime'],
                         json.loads(neo_data["crime_details"][ind])['crime details'],
                         json.loads(neo_data["suspect"][ind])['suspect'],
                         json.loads(neo_data["victim"][ind])['victim'][1])

0
1
2
3
4
5
6
7
8
9
10
11
12
13
