# Challenge
- 1. Data uploading
- 2. Plotting maps
- 3. Creating models

## 1. Data loading

In [1]:
import pandas as pd
import pydeck as pdk

df = pd.read_csv('data_squares.csv')

In [2]:
df.head()
#df.describe()

Unnamed: 0.1,Unnamed: 0,square_id,center_lat,center_lng,average_hh_ha,average_pers_ha,average_pt_class,average_pt_dis,average_station_dis,average_noise_street,average_number_of_supermarket,average_popularity_percent_normal,average_number_of_migros,average_popularity_percent_normal_migros
0,0,sq1-1,47.360332,8.430295,0.0,0.0,3.956522,562.030075,8531.165414,52.398496,0,,0,
1,1,sq1-2,47.360081,8.434614,1.446809,3.06383,2.893617,372.180851,8220.904255,52.712766,0,,0,
2,2,sq1-3,47.361836,8.438194,0.0,0.0,3.5,486.25,7911.785714,66.071429,0,,0,
3,3,sq1-4,47.359902,8.44525,8.0,19.076923,3.78022,158.021978,7450.054945,55.714286,0,,0,
4,4,sq1-5,47.35977,8.449631,3.478261,8.869565,3.0,85.869565,7136.086957,62.804348,0,,0,


In [3]:
#Reversing pt_class data to match order with size of categorical value
df['average_pt_class'] = 5 - df['average_pt_class']

## 2. Plotting maps
- 2.1 Create 2 layers for all components (scatter plot and heatmap layer)
- 2.2 Define viewpoint
- 2.3 Choose the plots and print

#### 2.1 Create 2 layers for all components (scatter plot and heatmap layer)

In [4]:
#Scatter Layer
layer_scatter=[]
for elements in df:
    layer_scatter.append(pdk.Layer(
        "ScatterplotLayer",
        df,
        opacity=0.2,
        get_position=["center_lng", "center_lat"],
        radius_scale=5,
        radius_min_pixels=0,
        radius_max_pixels=20,
        line_width_min_pixels=1,
        get_radius=elements,
        get_fill_color=[10, 200, 150],
        get_line_color=[0, 0, 0]
        ))

In [5]:
#Heatmap Layer
layer_heatmap=[]
for elements in df:
    layer_heatmap.append(pdk.Layer(
    "HeatmapLayer",
    df,
    opacity=0.9,
    get_position=["center_lng", "center_lat"],
    aggregation=pdk.types.String("MEAN"),
    threshold=1,
    get_weight=elements,
    pickable=True,
    ))

In [6]:
#More layering examples
layer_sup = pdk.Layer(
        "ScatterplotLayer",
        df,
        opacity=0.2,
        get_position=["center_lng", "center_lat"],
        radius_scale=30,
        radius_min_pixels=0,
        radius_max_pixels=300,
        line_width_min_pixels=1,
        get_radius='average_number_of_supermarket',
        get_fill_color=[10, 200, 150],
        get_line_color=[0, 0, 0]
        )

layer_migro = pdk.Layer(
        "ScatterplotLayer",
        df,
        opacity=0.2,
        get_position=["center_lng", "center_lat"],
        radius_scale=30,
        radius_min_pixels=0,
        radius_max_pixels=300,
        line_width_min_pixels=1,
        get_radius='average_number_of_migros',
        get_fill_color=[200, 50, 50],
        get_line_color=[0, 0, 0]
        )

#### 2.2 Define viewpoint

In [7]:
view_state = pdk.ViewState(
    longitude= 8.522128,
    latitude=47.395171,
    zoom=11.5,
    min_zoom=5,
    max_zoom=20)

#### 2.3 Choose the plots and print

In [8]:
#Define deck by selecting the layer and a name for the html file
r = pdk.Deck(layers=[layer_sup,layer_migro],
             initial_view_state=view_state,
             map_style ='light')

r.to_html('zurich-scatter-nsup.html')

## 3. Creating models
- 3.1 Create normalized database (using normalization min-max)
- 3.2 Create hypersimplistic model with multiplications and division
- 3.3 Create correlation adjusted model

#### 3.1 Create normalized database (using normalization min-max) 

In [55]:
#I am using a min-max scaling
#Source: https://sebastianraschka.com/Articles/2014_about_feature_scaling.html#standardizing-and-normalizing---how-it-can-be-done-using-scikit-learn
import copy
import numpy as np
df_dummy = df.copy()
df_pre_norm = df_dummy.iloc[:,4:].reset_index()
#df_norm = (df_pre_norm-df_pre_norm.min())/(df_pre_norm.max()-df_pre_norm.min())
df_norm = (df_pre_norm-df_pre_norm.mean())/(df_pre_norm.std())
df_norm = df_dummy.iloc[:,0:4].join(df_norm)
#df_norm.head()

Unnamed: 0.1,Unnamed: 0,square_id,center_lat,center_lng,index,average_hh_ha,average_pers_ha,average_pt_class,average_pt_dis,average_station_dis,average_noise_street,average_number_of_supermarket,average_popularity_percent_normal,average_number_of_migros,average_popularity_percent_normal_migros
0,0,sq1-1,47.360332,8.430295,-1.727375,-0.928319,-0.980478,-1.656045,1.580142,1.910337,-0.033982,-0.430145,,-0.294884,
1,1,sq1-2,47.360081,8.434614,-1.721138,-0.859814,-0.906677,-0.601166,0.571226,1.764492,0.005664,-0.430145,,-0.294884,
2,2,sq1-3,47.361836,8.438194,-1.714902,-0.928319,-0.980478,-1.20297,1.177424,1.619184,1.690905,-0.430145,,-0.294884,
3,3,sq1-4,47.359902,8.44525,-1.708666,-0.549527,-0.520958,-1.481074,-0.566879,1.402138,0.384316,-0.430145,,-0.294884,
4,4,sq1-5,47.35977,8.449631,-1.70243,-0.763627,-0.76683,-0.706746,-0.950319,1.25455,1.278752,-0.430145,,-0.294884,


#### 3.2 Create hypersimplistic model with multiplications and division

In [61]:
#Here the pt_class is adequately sorted (highest value corresponds to the highest quality)
df_num_supermarkt = df_dummy['average_number_of_supermarket']
df_num_migros = df_dummy['average_number_of_migros']
df_num_supermarkt.replace(0,0.5)
df_num_migros.replace(0,0.5)

#df_norm['simple_result'] = (df_norm['average_pers_ha']*df_norm['average_pt_class']/df_num_supermarkt)*(df_num_migros/df_num_supermarkt)
df_norm['simple_result'] = (df_norm['average_pers_ha']*df_norm['average_pt_class'])*(df_num_supermarkt-df_num_migros)/df_num_supermarkt
df_norm.describe()

Unnamed: 0.1,Unnamed: 0,center_lat,center_lng,index,average_hh_ha,average_pers_ha,average_pt_class,average_pt_dis,average_station_dis,average_noise_street,average_number_of_supermarket,average_popularity_percent_normal,average_number_of_migros,average_popularity_percent_normal_migros,simple_result
count,555.0,521.0,521.0,555.0,521.0,521.0,508.0,521.0,521.0,521.0,555.0,122.0,555.0,50.0,133.0
mean,277.0,47.395219,8.519979,0.0,3.409514e-17,-9.546639000000001e-17,-2.098059e-16,-1.3638060000000001e-17,3.545895e-16,8.864737e-16,6.401286e-18,-1.365028e-16,0.0,-2.398082e-16,0.593855
std,160.358972,0.021286,0.052523,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.992558
min,0.0,47.3589,8.428646,-1.727375,-0.9283185,-0.9804783,-1.699195,-0.9848332,-1.977164,-3.490398,-0.4301446,-3.238208,-0.294884,-2.554104,-0.90658
25%,138.5,47.375201,8.475183,-0.863687,-0.8375123,-0.8606152,-0.810502,-0.6189014,-0.7174626,-0.5370787,-0.4301446,-0.55294,-0.294884,-0.3622573,0.0
50%,277.0,47.395079,8.519395,0.0,-0.3548067,-0.358301,0.2083563,-0.435958,-0.1391968,0.1502267,-0.4301446,0.0553273,-0.294884,0.2176526,0.318196
75%,415.5,47.414825,8.564184,0.863687,0.5720158,0.6500206,0.8935998,0.2545745,0.6232814,0.645442,-0.4301446,0.6535155,-0.294884,0.6724301,0.990367
max,554.0,47.430959,8.609374,1.727375,4.760565,4.400994,1.278152,4.455345,2.65255,2.465049,9.353883,2.435426,7.258678,2.027086,4.669923


In [92]:
layer_simple_model = pdk.Layer(
    "HeatmapLayer",
    df_norm,
    opacity=0.9,
    get_position=["center_lng", "center_lat"],
    aggregation=pdk.types.String("MEAN"),
    threshold=1,
    get_weight='simple_result',
    pickable=True,
    )

r = pdk.Deck(layers=[layer_simple_model],
             initial_view_state=view_state,
             map_style ='light')

r.to_html('zurich-simple-model.html')

#### 3.3 Create correlation adjusted model
- Calculate correlation coefficients metrics
- 

In [76]:
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt
#Manipulate the existing number of supermarket column
df_dummy1 = df_norm.copy()
df_dummy1 = df_dummy1[['average_pers_ha','average_number_of_supermarket','average_pt_class','average_popularity_percent_normal']]
df_dummy2 = df_dummy1[['average_pers_ha','average_number_of_supermarket','average_pt_class']].dropna()
df_dummy3 = df_dummy1.dropna()
#df_num_supermarkt = df_dummy2['average_number_of_supermarket']
#df_ppl = df_dummy2['average_pers_ha'].dropna()

model = sm.GLM(df_dummy2['average_number_of_supermarket'],df_dummy2['average_pers_ha'],family=sm.families.Poisson())
gamma_results = model.fit()
print(gamma_results.summary())
print(gamma_results)
#t = np.arange(0, 20, 0.1)
#d = np.exp(-5)*np.power(5, t)/factorial(t)

                       Generalized Linear Model Regression Results                       
Dep. Variable:     average_number_of_supermarket   No. Observations:                  508
Model:                                       GLM   Df Residuals:                      507
Model Family:                            Poisson   Df Model:                            0
Link Function:                               log   Scale:                          1.0000
Method:                                     IRLS   Log-Likelihood:                -718.99
Date:                           Fri, 10 Sep 2021   Deviance:                       12804.
Time:                                   13:21:11   Pearson chi2:                     971.
No. Iterations:                                6                                         
Covariance Type:                       nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
----------------

In [72]:
model = sm.GLM(df_dummy2['average_number_of_supermarket'],df_dummy2['average_pt_class'],family=sm.families.Poisson())
gamma_results = model.fit()
print(gamma_results.summary())
print(gamma_results)

                       Generalized Linear Model Regression Results                       
Dep. Variable:     average_number_of_supermarket   No. Observations:                  508
Model:                                       GLM   Df Residuals:                      507
Model Family:                            Poisson   Df Model:                            0
Link Function:                               log   Scale:                          1.0000
Method:                                     IRLS   Log-Likelihood:                -707.71
Date:                           Fri, 10 Sep 2021   Deviance:                       12793.
Time:                                   12:51:47   Pearson chi2:                     919.
No. Iterations:                                6                                         
Covariance Type:                       nonrobust                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
---------------

In [80]:
model = sm.GLM(df_dummy3['average_number_of_supermarket'],df_dummy3['average_popularity_percent_normal'],family=sm.families.Poisson())
gamma_results = model.fit()
print(gamma_results.summary())
print(gamma_results)

                       Generalized Linear Model Regression Results                       
Dep. Variable:     average_number_of_supermarket   No. Observations:                  122
Model:                                       GLM   Df Residuals:                      121
Model Family:                            Poisson   Df Model:                            0
Link Function:                               log   Scale:                          1.0000
Method:                                     IRLS   Log-Likelihood:                -192.75
Date:                           Fri, 10 Sep 2021   Deviance:                       131.66
Time:                                   13:29:22   Pearson chi2:                     231.
No. Iterations:                                4                                         
Covariance Type:                       nonrobust                                         
                                        coef    std err          z      P>|z|      [0.025      0.975

#### Create a coefficient adjusted model
- Even assignment to all parameters (33-33-33)
- Heavier weights to transport and occupancy rate (40-40-20)
- Heavier occupancy (10-10-80)

In [84]:
#Here the pt_class is adequately sorted (highest value corresponds to the highest quality)
df_num_supermarkt = df_dummy['average_number_of_supermarket']
df_num_migros = df_dummy['average_number_of_migros']
df_num_supermarkt.replace(0,0.5)
df_num_migros.replace(0,0.5)
df_norm['average_popularity_percent_normal'] = df_norm['average_popularity_percent_normal'].fillna(0)

#df_norm['simple_result'] = (df_norm['average_pers_ha']*df_norm['average_pt_class']/df_num_supermarkt)*(df_num_migros/df_num_supermarkt)
df_norm['coeficient_adjusted_result'] = 0.2830*0.333*df_norm['average_pers_ha']+0.3935*0.3333*df_norm['average_pt_class']-0.1371*0.3333*df_norm['average_popularity_percent_normal']
df_norm['coeficient_adjusted_result'] = df_norm['coeficient_adjusted_result']/df_num_supermarkt

In [96]:
layer_simple_model = pdk.Layer(
    "HeatmapLayer",
    df_norm,
    opacity=0.9,
    get_position=["center_lng", "center_lat"],
    aggregation=pdk.types.String("MEAN"),
    threshold=1,
    get_weight='coeficient_adjusted_result',
    pickable=True,
    )

r = pdk.Deck(layers=[layer_simple_model],
             initial_view_state=view_state,
             map_style ='light')

r.to_html('zurich-coeficient_adj-model.html')

In [97]:
df_norm['coeficient_adjusted_result_1'] = 0.2830*0.4*df_norm['average_pers_ha']+0.3935*0.2*df_norm['average_pt_class']-0.1371*0.4*df_norm['average_popularity_percent_normal']
df_norm['coeficient_adjusted_result_1'] = df_norm['coeficient_adjusted_result_1']/df_num_supermarkt
layer_simple_model = pdk.Layer(
    "HeatmapLayer",
    df_norm,
    opacity=0.9,
    get_position=["center_lng", "center_lat"],
    aggregation=pdk.types.String("MEAN"),
    threshold=1,
    get_weight='coeficient_adjusted_result_1',
    pickable=True,
    )

r = pdk.Deck(layers=[layer_simple_model],
             initial_view_state=view_state,
             map_style ='light')

r.to_html('zurich-coeficient_adj-model_1.html')

In [98]:
df_norm['coeficient_adjusted_result_2'] = 0.2830*0.2*df_norm['average_pers_ha']+0.3935*0.2*df_norm['average_pt_class']-0.1371*0.6*df_norm['average_popularity_percent_normal']
df_norm['coeficient_adjusted_result_2'] = df_norm['coeficient_adjusted_result_2']/df_num_supermarkt
layer_simple_model = pdk.Layer(
    "HeatmapLayer",
    df_norm,
    opacity=0.9,
    get_position=["center_lng", "center_lat"],
    aggregation=pdk.types.String("MEAN"),
    threshold=1,
    get_weight='coeficient_adjusted_result_2',
    pickable=True,
    )

r = pdk.Deck(layers=[layer_simple_model],
             initial_view_state=view_state,
             map_style ='light')

r.to_html('zurich-coeficient_adj-model_2.html')