In [12]:
import pandas as pd

from collections import Counter, OrderedDict
from ast import literal_eval

In [56]:
df = pd.read_csv("../data/city_area_buildings.csv", usecols=["pand_id", "gebruiksdoelVerblijfsobject", "neighbors", "linked_small", "linked_big"])

In [25]:
unique_functions = []

for index, row in df.iterrows():
    functions = literal_eval(row.gebruiksdoelVerblijfsobject)
    
    for item in functions:
        if item not in unique_functions:
            unique_functions.append(item)
            
print(unique_functions)

['industriefunctie', 'woonfunctie', 'kantoorfunctie', 'overige gebruiksfunctie', 'onderwijsfunctie', 'winkelfunctie', 'bijeenkomstfunctie', 'gezondheidszorgfunctie', 'sportfunctie', 'logiesfunctie', 'celfunctie']


In [45]:
risk_classes = {
    'overige gebruiksfunctie' : 1, 
    'winkelfunctie' : 1,
    'sportfunctie' : 1, 
    'logiesfunctie' : 1, 
    'celfunctie' : 2,
    'industriefunctie' : 2,
    'kantoorfunctie' : 2, 
    'woonfunctie' : 3,
    'bijeenkomstfunctie' : 3, 
    'onderwijsfunctie' : 4, 
    'gezondheidszorgfunctie' : 4
}

In [53]:
def calculate_risk_small(row):
    df_building = df[df.pand_id == float(row['pand_id'])]
    functions_building = literal_eval(df_building.gebruiksdoelVerblijfsobject.values[0])
    functions = Counter(functions_building)

    # get neighbors functions
    df_neighbors = list(df[df.pand_id.isin(literal_eval(df_building.neighbors.values[0]))].gebruiksdoelVerblijfsobject)
    neighbor_functions = [item for lijst in df_neighbors for item in literal_eval(lijst)]
    neighbor_functions = Counter(neighbor_functions)

    column = "linked_small"

    linked_buildings = list(df[df.pand_id.isin(literal_eval(df_building[column].values[0]))].gebruiksdoelVerblijfsobject)
    linked_functions = [item for lijst in linked_buildings for item in literal_eval(lijst)]
    linked_functions = Counter(linked_functions)
    
    score = 0

    for item in dict(neighbor_functions).items():    
        score += risk_classes[item[0]] * item[1]

    for item in dict(linked_functions).items():    
        score += risk_classes[item[0]] * item[1]
        
    return score

def calculate_risk_big(row):
    df_building = df[df.pand_id == float(row['pand_id'])]
    functions_building = literal_eval(df_building.gebruiksdoelVerblijfsobject.values[0])
    functions = Counter(functions_building)

    # get neighbors functions
    df_neighbors = list(df[df.pand_id.isin(literal_eval(df_building.neighbors.values[0]))].gebruiksdoelVerblijfsobject)
    neighbor_functions = [item for lijst in df_neighbors for item in literal_eval(lijst)]
    neighbor_functions = Counter(neighbor_functions)

    column = "linked_big"

    linked_buildings = list(df[df.pand_id.isin(literal_eval(df_building[column].values[0]))].gebruiksdoelVerblijfsobject)
    linked_functions = [item for lijst in linked_buildings for item in literal_eval(lijst)]
    linked_functions = Counter(linked_functions)
    
    score = 0

    for item in dict(neighbor_functions).items():    
        score += risk_classes[item[0]] * item[1]

    for item in dict(linked_functions).items():    
        score += risk_classes[item[0]] * item[1]
        
    return score

In [57]:
df['risk_score_small'] = df.apply(calculate_risk_small, axis = 1)
df['risk_score_big']   = df.apply(calculate_risk_big, axis = 1)

In [59]:
df_scores = df[['pand_id', 'risk_score_small', 'risk_score_big']]

In [67]:
from sklearn import preprocessing

x              = df_scores[['risk_score_small']].values
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled       = min_max_scaler.fit_transform(x)
df_normalized  = pd.DataFrame(x_scaled)
joined         = df_scores.join(df_normalized)
final          = joined.rename(columns={ joined.columns[3]: "small_normalized" })

x              = final[['risk_score_big']].values
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled       = min_max_scaler.fit_transform(x)
df_normalized  = pd.DataFrame(x_scaled)
joined         = final.join(df_normalized)
final          = joined.rename(columns={ joined.columns[4]: "big_normalized" })

In [74]:
final.to_csv('../data/risk_scores.csv')

In [80]:
final[final['pand_id'] == float(363100012239578)]['risk_score_small'].values[0]

345