In this workbook we want to merge all data into one final dataframe, that is used as the basis for the heatmap.
The structure should be like:

x | y | dist_educ | dist_... | ... | scaled_dist_educ | scaled_dist_... | ... | final_measure

In [1]:
import pandas as pd

In [68]:
# import the datapoints for the Porto grid
df = pd.read_csv("gridref.csv")

In [10]:
# import green areas
df_green = pd.read_csv("Data/green_areas_df.csv", sep="\t")

In [13]:
# import preschools
df_ed_pre = pd.read_csv("Data/Education/preschools.csv")

In [15]:
# import primary schools
df_ed_prim = pd.read_csv("Data/Education/primaryschools.csv")

In [17]:
# import secondary schools
df_ed_second = pd.read_csv("Data/Education/secondaryschools.csv")

In [19]:
# import superior schools
df_ed_sup = pd.read_csv("Data/Education/superiorschools.csv")

Done with uploading!!!

In [62]:
#function that computes the distance to the closeset point of interest
def calculate_dist(df):
    def calculate(row):
        y = row["long"]
        x = row["lat"]
        temp_list = []
        for i in range(df.shape[0]):
            dist = get_distance(df.loc[i,"y"], df.loc[i, "x"], y, x)
            temp_list.append(dist)
        return min(temp_list)
    return calculate

In [23]:
# credit to: https://stackoverflow.com/questions/44743075/calculate-the-distance-between-two-coordinates-with-python
import math

def get_distance(y_1, x_1, y_2, x_2): 
    d_y = y_2 - y_1
    d_x = x_2 - x_1 

    temp = (  
         math.sin(d_y / 2) ** 2 
       + math.cos(y_1) 
       * math.cos(y_2) 
       * math.sin(d_x / 2) ** 2
    )

    return 6373.0 * (2 * math.atan2(math.sqrt(temp), math.sqrt(1 - temp)))

Merge all dataframes and calculate distances!

In [69]:
df["green"] = df.apply(calculate_dist(df_green), axis=1)

In [70]:
df["ed_pre"] = df.apply(calculate_dist(df_ed_pre), axis=1)

In [71]:
df["ed_prim"] = df.apply(calculate_dist(df_ed_prim), axis=1)

In [72]:
df["ed_second"] = df.apply(calculate_dist(df_ed_second), axis=1)

In [73]:
df["ed_sup"] = df.apply(calculate_dist(df_ed_sup), axis=1)

Scale these badboys"!

In [77]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [78]:
df["green_scaled"] = scaler.fit_transform(df[['green']])

In [81]:
df["ed_pre_scaled"] = scaler.fit_transform(df[['ed_pre']])

In [82]:
df["ed_prim_scaled"] = scaler.fit_transform(df[['ed_prim']])

In [83]:
df["ed_second_scaled"] = scaler.fit_transform(df[['ed_second']])

In [84]:
df["ed_sup_scaled"] = scaler.fit_transform(df[['ed_sup']])

In [85]:
df.describe()

Unnamed: 0,long,lat,green,ed_pre,ed_prim,ed_second,ed_sup,green_scaled,ed_pre_scaled,ed_prim_scaled,ed_second_scaled,ed_sup_scaled
count,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0
mean,41.16375,-8.62375,97.98414,39.978064,62.827738,83.824697,69.311213,0.381497,0.257052,0.331112,0.408263,0.258086
std,0.014422,0.038983,54.064701,27.323371,34.73455,43.026535,50.398427,0.215064,0.181966,0.189804,0.213432,0.192294
min,41.14,-8.69,2.080153,1.379907,2.233591,1.521529,1.669363,0.0,0.0,0.0,0.0,0.0
25%,41.151875,-8.6575,53.811507,19.464778,36.786573,50.427486,30.491485,0.205783,0.12044,0.188812,0.242597,0.10997
50%,41.16375,-8.62375,91.356868,32.283162,56.634541,80.809132,58.539605,0.355135,0.205807,0.297269,0.393305,0.216987
75%,41.175625,-8.59,136.924915,55.378146,82.676931,117.178573,98.395686,0.5364,0.359612,0.439576,0.573715,0.369057
max,41.1875,-8.5575,253.46853,151.53671,185.235744,203.114878,263.759723,1.0,1.0,1.0,1.0,1.0


In [94]:
weight_green = 0.2
weight_ed_pre = 0.2
weight_ed_prim = 0.2
weight_ed_second = 0.2
weight_ed_sup = 0.2

In [95]:
df["QOL"] = weight_green*df["green_scaled"] + \
            weight_ed_pre*df["ed_pre_scaled"] + \
            weight_ed_prim*df["ed_prim_scaled"] + \
            weight_ed_second*df["ed_second_scaled"] + \
            weight_ed_sup*df["ed_sup_scaled"]

In [97]:
df.describe()

Unnamed: 0,long,lat,green,ed_pre,ed_prim,ed_second,ed_sup,green_scaled,ed_pre_scaled,ed_prim_scaled,ed_second_scaled,ed_sup_scaled,QOL
count,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0,1080.0
mean,41.16375,-8.62375,97.98414,39.978064,62.827738,83.824697,69.311213,0.381497,0.257052,0.331112,0.408263,0.258086,0.327202
std,0.014422,0.038983,54.064701,27.323371,34.73455,43.026535,50.398427,0.215064,0.181966,0.189804,0.213432,0.192294,0.15591
min,41.14,-8.69,2.080153,1.379907,2.233591,1.521529,1.669363,0.0,0.0,0.0,0.0,0.0,0.066844
25%,41.151875,-8.6575,53.811507,19.464778,36.786573,50.427486,30.491485,0.205783,0.12044,0.188812,0.242597,0.10997,0.208829
50%,41.16375,-8.62375,91.356868,32.283162,56.634541,80.809132,58.539605,0.355135,0.205807,0.297269,0.393305,0.216987,0.295315
75%,41.175625,-8.59,136.924915,55.378146,82.676931,117.178573,98.395686,0.5364,0.359612,0.439576,0.573715,0.369057,0.437428
max,41.1875,-8.5575,253.46853,151.53671,185.235744,203.114878,263.759723,1.0,1.0,1.0,1.0,1.0,0.878107
