In [1]:
import sys
import pathlib
import pandas as pd
import numpy as np

ROOT = pathlib.Path().absolute().parent.as_posix()
if ROOT not in sys.path:
    sys.path.append(ROOT)
    
from helpers import *

In [2]:
points_df = get_spaced_point_set_in_bbox(0.25, (51.239405, -0.625211), (51.737184, 0.328289))
points_df.to_csv(ROOT + '/Spikes/Dash/data/points_df.csv')

221


100%|██████████| 221/221 [00:00<00:00, 765.15it/s]


In [3]:
points_df

Unnamed: 0,Latitude,Longitude
0,51.737184,-0.620643
1,51.737184,-0.617012
2,51.737184,-0.613382
3,51.737183,-0.609751
4,51.737183,-0.606120
...,...,...
58243,51.238843,0.312049
58244,51.238815,0.315640
58245,51.238786,0.319231
58246,51.238757,0.322822


In [4]:
import pandas as pd
import numpy as np

points_df_filled = pd.read_csv(ROOT + '/Spikes/Dash/data/points_df.csv', index_col = 0)

def apply_aq_functions(points_df_filled):
    #molar mass constants
    co_molar_mass = 28.01
    no2_molar_mass = 46.0055
    o3_molar_mass = 48
    so2_molar_mass = 64.066

    #apply aq functions to each row (using latitude and longitude columns) and multiply by associated molar mass to give g/m2
    #axis = 1, apply function to each row
    points_df_filled['Value_co'] = points_df_filled.apply(lambda row : co_function(row[0], row[1]) * co_molar_mass, axis=1)
    points_df_filled['Value_no2'] = points_df_filled.apply(lambda row : no2_function(row[0], row[1]) * no2_molar_mass, axis=1)
    points_df_filled['Value_o3'] = points_df_filled.apply(lambda row : o3_function(row[0], row[1]) * o3_molar_mass, axis=1)
    points_df_filled['Value_so2'] = points_df_filled.apply(lambda row : so2_function(row[0], row[1]) * so2_molar_mass, axis=1)
    points_df_filled['Value_ai'] = points_df_filled.apply(lambda row : ai_function(row[0], row[1]), axis=1)
    
    return points_df_filled

def normalise(points_df_filled):
    norm_cols = ['Value_co', 'Value_no2', 'Value_o3', 'Value_so2', 'Value_ai']
    #normalise each aq metric value set between 1 and 0 where 0 = 0% and 1 = 20%
    for i in points_df_filled[norm_cols]:   #normalise aq value columns
        points_df_filled['norm_' + i]=(points_df_filled[i]-points_df_filled[i].min())/(points_df_filled[i].max()-points_df_filled[i].min())
    
    return points_df_filled

def aqs_function(aq1, aq2, aq3, aq4, aq5):
    #smaller value = better air quality
    aqs = (aq1 * (20/100)) + (aq2 * (20/100)) + (aq3 * (20/100)) + (aq4 * (20/100)) + (aq5 * (20/100))
    return aqs

def apply_aqs_function(points_df_filled):
    #assumption: each metric is worth 20% of AQS, 100 / 5 metrics
    #apply calculate_aqi function to each row of the 5 aq columns
    points_df_filled['AQ_score'] = points_df_filled.apply(lambda row : aqs_function(row['norm_Value_co'], 
                                                                                            row['norm_Value_no2'], 
                                                                                            row['norm_Value_o3'], 
                                                                                            row['norm_Value_so2'], 
                                                                                            row['norm_Value_ai']), axis=1)
    
    #drop normalised columns (as unuseful now)
    points_df_filled = points_df_filled.drop(['norm_Value_co', 'norm_Value_no2', 'norm_Value_o3', 'norm_Value_so2', 'norm_Value_ai'], axis = 1)
    
    return points_df_filled

def apply_popd_function(points_df_filled):
    #same as above apply aq functions but with...
    #popdensity_function
    points_df_filled['Pop_density'] = points_df_filled.apply(lambda row : popdensity_function(row[0], row[1]), axis=1)
    
    return points_df_filled

def greenspace_score_function(land_type, aq_score, pop_density):
    pass

def apply_greenspace_score_function(points_df_filled):
    points_df_filled['Greenspace_score'] = points_df_filled.apply(lambda row : greenspace_score_function(row['Land_type'], 
                                                                                            row['AQ_score'], 
                                                                                            row['Pop_density']), axis=1)

def fill_df(points_df_filled):
    #increase speed execution using smaller memory data type BUT may lose precision
    #16bit: 0.1235
    #32bit: 0.12345679
    #64bit: 0.12345678912121212
    #points_df_filled = points_df_filled.astype(np.float64)

    points_df_filled = apply_aq_functions(points_df_filled)
    points_df_filled = normalise(points_df_filled)
    points_df_filled = apply_aqs_function(points_df_filled)
    points_df_filled = apply_popd_function(points_df_filled)
    try:
        points_df_filled = apply_greenspace_score_function(points_df_filled)
    except:
        pass   #remove when complete
    
    return points_df_filled

In [5]:
points_df_filled = pd.read_csv(ROOT + '/Spikes/Dash/data/points_df.csv', index_col = 0)

fill_df(points_df_filled)

In [None]:
#save dataframe
points_df_filled.to_csv(ROOT + '/Spikes/Dash/data/final_csv.csv')

In [None]:
points_df_filled

Unnamed: 0,Latitude,Longitude,Value_co,Value_no2,Value_o3,Value_so2,Value_ai
0,51.737184,-0.620643,0.764264,0.004088,7.248996,0.032141,-1.031149
1,51.737184,-0.617012,0.764304,0.004140,7.250456,0.032148,-1.035486
2,51.737184,-0.613382,0.764350,0.004143,7.250998,0.032186,-1.037714
3,51.737183,-0.609751,0.764624,0.004142,7.251659,0.032087,-1.035978
4,51.737183,-0.606120,0.764288,0.004142,7.252279,0.031958,-1.039697
...,...,...,...,...,...,...,...
58243,51.238843,0.312049,0.764161,0.002717,7.310894,0.028964,-0.748909
58244,51.238815,0.315640,0.764524,0.002728,7.310550,0.029058,-0.745427
58245,51.238786,0.319231,0.764969,0.002732,7.310113,0.029139,-0.744029
58246,51.238757,0.322822,0.765131,0.002735,7.311109,0.029128,-0.740657
