In [1]:
import pandas as pd
from anytree import Node, RenderTree
from anytree.exporter import UniqueDotExporter, DotExporter

In [2]:
df = pd.read_csv("collisions.csv", low_memory=False)
#rename columns
df = df.rename(columns={'CRASH DATE' : 'Date', 'CRASH TIME' : 'Time', 'ON STREET NAME' : 'Street', 
                        'VEHICLE TYPE CODE 1' : 'Vehicle 1', 'VEHICLE TYPE CODE 2' : 'Vehicle 2',
                        'VEHICLE TYPE CODE 3' : 'Vehicle 3', 'VEHICLE TYPE CODE 4' : 'Vehicle 4',
                        'VEHICLE TYPE CODE 5' : 'Vehicle 5'})

##List of columns you want to keep
columns_to_keep = ['Date', 'Time', 'Street', 'Vehicle 1', 'Vehicle 2', 'Vehicle 3', 'Vehicle 4','Vehicle 5']
df = df[columns_to_keep]

In [3]:
root = Node("Root")

timeList = [("00:00","06:00"),("06:00","10:00"),("10:00","15:00"),("15:00","19:00"),("19:00","24:00")];

In [4]:
street_names_unique = df["Street"].unique()

In [5]:
street_names = street_names_unique[:1000]
print(street_names)

# Iterate over each unique street name
for street_name in street_names:
    street_node = Node(street_name, parent=root)
    
    # Subset the data for the current street name
    street_data = df[df["Street"] == street_name]
    
    # Segment the data by crash time
    crash_times = street_data["Time"].unique()
    
    for timeslot in timeList:
        timeslot_node = Node(timeslot, parent=street_node)
        
        local_crash_times = []
        
        for crash_time in crash_times:
            slot_hour_begin, slot_minute_begin = map(int, timeslot[0].split(':'))
            slot_hour_end, slot_minute_end = map(int, timeslot[1].split(':'))
            hour, minute = map(int, crash_time.split(':'))
            
            if slot_hour_begin <= hour < slot_hour_end:
                local_crash_times.append(crash_time)
    
        # Iterate over each unique crash time
        for crash_time in local_crash_times:
            crash_time_node = Node(crash_time, parent=timeslot_node)
        
            # Subset the data for the current crash time and street name
            crash_time_data = street_data[street_data["Time"] == crash_time]
        
            # Segment the data by vehicle types
            vehicle_types = crash_time_data[["Vehicle 1", "Vehicle 2"]].stack().unique()
        
            # Iterate over each unique vehicle type
            for vehicle_type in vehicle_types:
                vehicle_type_node = Node(vehicle_type, parent=crash_time_node)


['WHITESTONE EXPRESSWAY' 'QUEENSBORO BRIDGE UPPER' 'THROGS NECK BRIDGE'
 nan 'SARATOGA AVENUE' 'MAJOR DEEGAN EXPRESSWAY RAMP'
 'BROOKLYN QUEENS EXPRESSWAY' '3 AVENUE' 'MYRTLE AVENUE'
 'SPRINGFIELD BOULEVARD' 'broadway' 'BELT PARKWAY' 'NORTH CONDUIT AVENUE'
 'MEEKER AVENUE' 'WEST KINGSBRIDGE ROAD' 'LONG ISLAND EXPRESSWAY'
 '82 STREET' 'LEXINGTON AVENUE' 'VICTORY BOULEVARD' 'EAST 18 STREET'
 'FULTON STREET' 'GRAND STREET' 'EAST 93 STREET' 'EASTCHESTER ROAD'
 'KINGSLAND AVENUE' 'WILLIAMSBURG BRIDGE OUTER ROADWA'
 'HUTCHINSON RIVER PARKWAY' 'ELIOT AVENUE' 'STATEN ISLAND EXPRESSWAY'
 'BOSTON ROAD' 'EAST 107 STREET' 'BOSCOBEL PLACE' 'BRONX RIVER PARKWAY'
 'EAST 162 STREET' 'CROSS BRONX EXPY' 'MAJOR DEEGAN EXPRESSWAY'
 'WEST 56 STREET' 'AVENUE J' 'SAINT MARKS AVENUE' 'THROOP AVENUE'
 'HENRY HUDSON PARKWAY' 'ROCKAWAY AVENUE' 'UTICA AVENUE' 'THOMSON AVENUE'
 'VANWYCK EXPRESSWAY' '21 STREET' 'JAMAICA AVENUE' '1 AVENUE'
 'BEACH CHANNEL DRIVE' 'EAST 7 STREET' 'WEST 177 STREET' '149 AVENUE'
 'CROWN

In [6]:
def get_descendants_count(node):
    count = len(node.children)
    for child in node.children:
        count += get_descendants_count(child)
    return count

def update_node_names(node):
    count = get_descendants_count(node)
    node.name = f"{node.name} ({count})"
    for child in node.children:
        update_node_names(child)


##for pre, _, node in RenderTree(root):
##    count = len(node.children)
##    node.name = f"{node.name} ({count})"
    
update_node_names(root)

In [7]:
# Print tree to visualize
#for pre, fill, node in RenderTree(root):
#    print(f"{pre}{node.name}")


In [8]:
#Export tree to .dot File and into image(not working well with huge trees)
#UniqueDotExporter(root, maxlevel=3).to_picture("accidents.png")
DotExporter(root).to_dotfile("accidents.dot")
DotExporter(root).to_dotfile("accidentsFile.txt")

In [9]:
# Eingabe der aktuellen Straße

inputStreetName = input("Enter Street Name:")

Enter Street Name:BROADWAY


In [10]:
selectedNode = root;

for node in root.children:
    if inputStreetName in node.name and selectedNode == root:
        selectedNode = node;

print("Gefundene Straße:" + selectedNode.name)        

Gefundene Straße:BROADWAY (3653)


In [14]:
time = "null"        

# Eingabe der aktuellen Uhrzeit

if selectedNode != root:        
    time = input("Uhrzeit :")

# Ermitteln des passenden Zeitslots:

fittingTimeslot = ("null","null")

hour, minute = map(int, time.split(':'))

for timeSlot in timeList:
    slot_hour_begin, slot_minute_begin = map(int, timeSlot[0].split(':'))
    slot_hour_end, slot_minute_end = map(int, timeSlot[1].split(':'))
    if slot_hour_begin < hour <= slot_hour_end and fittingTimeslot == ("null","null"):
        fittingTimeslot = timeSlot
        
print("Gewählter Zeitslot:" + fittingTimeslot[0]+"-"+fittingTimeslot[1])    
timeNode = root

if time != "null":
    for node in selectedNode.children:
        if fittingTimeslot[0] in node.name and fittingTimeslot[1] in node.name and timeNode == root:
            timeNode = node
            
print(timeNode)
print(timeNode.children)

Uhrzeit :05:55
Gewählter Zeitslot:00:00-06:00
Node("/Root (287570)/BROADWAY (3653)/('00:00', '06:00') (544)")
(Node("/Root (287570)/BROADWAY (3653)/('00:00', '06:00') (544)/E-Bike (1)"), Node("/Root (287570)/BROADWAY (3653)/('00:00', '06:00') (544)/Station Wagon/Sport Utility Vehicle/2020 (1)"), Node("/Root (287570)/BROADWAY (3653)/('00:00', '06:00') (544)/Box Truck (1)"), Node("/Root (287570)/BROADWAY (3653)/('00:00', '06:00') (544)/Sedan (1)"), Node("/Root (287570)/BROADWAY (3653)/('00:00', '06:00') (544)/Station Wagon/Sport Utility Vehicle/2020 (1)"), Node("/Root (287570)/BROADWAY (3653)/('00:00', '06:00') (544)/Sedan (1)"), Node("/Root (287570)/BROADWAY (3653)/('00:00', '06:00') (544)/Sedan (1)"), Node("/Root (287570)/BROADWAY (3653)/('00:00', '06:00') (544)/Station Wagon/Sport Utility Vehicle/2020 (1)"), Node("/Root (287570)/BROADWAY (3653)/('00:00', '06:00') (544)/Taxi (1)"), Node("/Root (287570)/BROADWAY (3653)/('00:00', '06:00') (544)/Station Wagon/Sport Utility Vehicle/2020 (1

In [12]:
# Ausrechnen der Gefahr:
if timeNode == root:
    print("Es ist ein Fehler aufgetreten, bitte versuche es erneut!")    

In [13]:
if len(timeNode.children) == 0:
    print("Es besteht eine sehr geringe Gefahr!")

    #TODO: Change some thing
#if len(timeNode.children) > 0:
#    vehicleTypeHighest = timeNode.children[0];
#    for node in timeNode.children:
#        if len(node.children) > len(vehicleTypeHighest.children):
#            vehicleTypeHighest = node

#    print("Das höchste Risiko besteht mit folgendem Fahrzeugtypen:" , vehicleTypeHighest.name)

Das höchste Risiko besteht mit folgendem Fahrzeugtypen: E-Bike (1)
