In [2]:
import pandas as pd

from collections import Counter, OrderedDict
from ast import literal_eval

In [46]:
df = pd.read_csv("../data/city_area_buildings.csv", usecols=["pand_id", "gebruiksdoelVerblijfsobject", "neighbors", "linked_small", "linked_big", 'ov_small', 'ov_big'])

In [98]:
(len(literal_eval((df[df['pand_id'] == 363100012157417]['ov_big']).values[0])) / 7) + 1

1.1428571428571428

In [50]:
roads = pd.read_csv('../data/road_scores_in_radius.csv')

In [51]:
roads = roads.drop('Unnamed: 0', axis = 1)
roads

Unnamed: 0,pand_id,small_fire,big_fire
0,3.631000e+14,1.0,1.000000
1,3.631000e+14,1.0,1.000000
2,3.631000e+14,1.0,1.000000
3,3.631000e+14,1.0,1.000000
4,3.631000e+14,1.0,1.000000
...,...,...,...
3767,3.631000e+14,1.0,1.142857
3768,3.631000e+14,1.0,1.142857
3769,3.631000e+14,1.0,1.142857
3770,3.631000e+14,1.0,1.142857


In [73]:
unique_functions = []

for index, row in df.iterrows():
    functions = literal_eval(row.gebruiksdoelVerblijfsobject)
    
    for item in functions:
        if item not in unique_functions:
            unique_functions.append(item)
            
print(unique_functions)

['industry function', 'residential function', 'office function', 'other usage', 'educational function', 'shopping function', 'meet function', 'health care function', 'sports function', 'accomodation function', 'cell function']


In [74]:
risk_classes = {
    'other usage' : 1, 
    'shopping function' : 1,
    'sports function' : 1, 
    'accomodation function' : 1, 
    'cell function' : 2,
    'industry function' : 2,
    'office function' : 2, 
    'residential function' : 3,
    'meet function' : 3, 
    'educational function' : 4, 
    'health care function' : 4
}

In [87]:
def calculate_risk_small(row):
    df_building = df[df.pand_id == float(row['pand_id'])]
    functions_building = literal_eval(df_building.gebruiksdoelVerblijfsobject.values[0])
    functions = Counter(functions_building)

    # get neighbors functions
    df_neighbors = list(df[df.pand_id.isin(literal_eval(df_building.neighbors.values[0]))].gebruiksdoelVerblijfsobject)
    neighbor_functions = [item for lijst in df_neighbors for item in literal_eval(lijst)]
    neighbor_functions = Counter(neighbor_functions)

    column = "linked_small"

    linked_buildings = list(df[df.pand_id.isin(literal_eval(df_building[column].values[0]))].gebruiksdoelVerblijfsobject)
    linked_functions = [item for lijst in linked_buildings for item in literal_eval(lijst)]
    linked_functions = Counter(linked_functions)
    
    score = 0

    for item in dict(neighbor_functions).items():    
        score += risk_classes[item[0]] * item[1]

    for item in dict(linked_functions).items():    
        score += risk_classes[item[0]] * item[1]
        
    score = score * float(roads[roads['pand_id'] == float(row['pand_id'])]['small_fire'])
    score = score * ((len(literal_eval((df[df['pand_id'] == float(row['pand_id'])]['ov_small']).values[0])) / 3) + 1)
        
    return round(score, 2)

def calculate_risk_big(row):
    df_building = df[df.pand_id == float(row['pand_id'])]
    functions_building = literal_eval(df_building.gebruiksdoelVerblijfsobject.values[0])
    functions = Counter(functions_building)

    # get neighbors functions
    df_neighbors = list(df[df.pand_id.isin(literal_eval(df_building.neighbors.values[0]))].gebruiksdoelVerblijfsobject)
    neighbor_functions = [item for lijst in df_neighbors for item in literal_eval(lijst)]
    neighbor_functions = Counter(neighbor_functions)

    column = "linked_big"

    linked_buildings = list(df[df.pand_id.isin(literal_eval(df_building[column].values[0]))].gebruiksdoelVerblijfsobject)
    linked_functions = [item for lijst in linked_buildings for item in literal_eval(lijst)]
    linked_functions = Counter(linked_functions)
    
    score = 0

    for item in dict(neighbor_functions).items():    
        score += risk_classes[item[0]] * item[1]

    for item in dict(linked_functions).items():    
        score += risk_classes[item[0]] * item[1]
        
    score = score * float(roads[roads['pand_id'] == float(row['pand_id'])]['big_fire'])
    score = score * ((len(literal_eval((df[df['pand_id'] == float(row['pand_id'])]['ov_big']).values[0])) / 7) + 1)

    return round(score, 2)

In [88]:
df['risk_score_small'] = df.apply(calculate_risk_small, axis = 1)
df['risk_score_big']   = df.apply(calculate_risk_big, axis = 1)

In [89]:
df_scores = df[['pand_id', 'risk_score_small', 'risk_score_big']]
df_scores

Unnamed: 0,pand_id,risk_score_small,risk_score_big
0,3.631000e+14,72.0,246.00
1,3.631000e+14,102.0,197.00
2,3.631000e+14,59.0,266.00
3,3.631000e+14,104.0,240.00
4,3.631000e+14,103.0,228.00
...,...,...,...
3767,3.631000e+14,18.0,54.86
3768,3.631000e+14,18.0,54.86
3769,3.631000e+14,18.0,51.43
3770,3.631000e+14,18.0,54.86


In [90]:
from sklearn import preprocessing

x              = df_scores[['risk_score_small']].values
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled       = min_max_scaler.fit_transform(x)
df_normalized  = pd.DataFrame(x_scaled)
joined         = df_scores.join(df_normalized)
final          = joined.rename(columns={ joined.columns[3]: "small_normalized" })

x              = final[['risk_score_big']].values
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled       = min_max_scaler.fit_transform(x)
df_normalized  = pd.DataFrame(x_scaled)
joined         = final.join(df_normalized)
final          = joined.rename(columns={ joined.columns[4]: "big_normalized" })

In [91]:
final

Unnamed: 0,pand_id,risk_score_small,risk_score_big,small_normalized,big_normalized
0,3.631000e+14,72.0,246.00,0.047368,0.068476
1,3.631000e+14,102.0,197.00,0.067105,0.054836
2,3.631000e+14,59.0,266.00,0.038816,0.074043
3,3.631000e+14,104.0,240.00,0.068421,0.066806
4,3.631000e+14,103.0,228.00,0.067763,0.063465
...,...,...,...,...,...
3767,3.631000e+14,18.0,54.86,0.011842,0.015271
3768,3.631000e+14,18.0,54.86,0.011842,0.015271
3769,3.631000e+14,18.0,51.43,0.011842,0.014316
3770,3.631000e+14,18.0,54.86,0.011842,0.015271


In [92]:
final.to_csv('../data/risk_scores.csv')