# SSML group assignment

### Packages

In [1]:
import numpy as np
import osmnx as ox
import pandas as pd
from scipy import stats
import geopandas as gpd
ox.config(use_cache=True, log_console=True)

  ox.config(use_cache=True, log_console=True)


### GHS data

In [2]:
def n_biggest_cities_europe(n:int) -> list:
    '''
    N is equal to the number of cities
    The function returns a list with the N biggest cities in Europe.
    This is based on the opendatasoft data set.
    The function filters the cities based on TimeZone column. If it is an European timezone the city is considerd European.
    This means that Turkish and Russian cities are often considered European.
    '''

    cities_df = pd.read_csv("cities.csv", delimiter=";")

    # Only keep the European cities
    cities_df = cities_df[cities_df['Timezone'].str.contains('Europe')]

    # Keep the N biggest cities
    cities_df = cities_df.nlargest(n, 'Population')

    cities = []
    for index, row in cities_df.iterrows():
        city_name_country = row['Name']
        cities.append(city_name_country)

    return cities

def get_cities_ghs(df:pd.DataFrame, cities:list) -> pd.DataFrame:
    '''
    df is a complete dataset from GHS.
    cities is a the list of cities that we want to filter from the GHS dataframe.
    This function returns the filtered DataFrame from GHS based on the cities in the cities list.
    '''

    cities_list = []
    df["UC_NM_MN"] = df['UC_NM_MN'].str.lower()

    for city in cities:
        city = city.lower()
        city_df = df[df['UC_NM_MN'] == city]
        if len(city_df) == 0:
            print(f"Unable to find {city} in GHS DataFrame! We have excluded this city from further use.")
        else:
            cities_list.append(city_df)

    return pd.concat(cities_list, ignore_index=True)

def get_cities_osmnx(df:pd.DataFrame) -> pd.DataFrame:
    '''
    df is the filtered dataset from GHS
    This function returns the GHS DataFrame with 2 extra values for each row: orientation_entropy, orientation_order.
    '''

    df["orientation_entropy"] = np.nan
    df["orientation_order"] = np.nan
    entropy_bins = 36

    counter = 1
    for index, row in df.iterrows():
        city = row['UC_NM_MN']
        print(f"Currently we are calculating city number {counter}, {city}")
        try:
            Gu = ox.add_edge_bearings(ox.get_undirected(ox.graph_from_place(city, network_type='drive')))
            orientation_entropy = ox.bearing.orientation_entropy(Gu)

            perfect_grid = [1] * 4 + [0] * (entropy_bins - 4)
            min_entropy = stats.entropy(perfect_grid)
            max_entropy = np.log(entropy_bins)    
            orientation_order = 1 - ((orientation_entropy - min_entropy) / (max_entropy - min_entropy)) ** 2

            df.at[index, "orientation_entropy"] = orientation_entropy
            df.at[index, "orientation_order"] = orientation_order
            counter +=1
            
        except:
            row["orientation_entropy"] = 0
            row["orientation_order"] = 0
            print(f"We have failed to make the calculations for {city}")

    return df

In [3]:
ghs_full_df = gpd.read_file("ghs/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg")
cities = n_biggest_cities_europe(50)

df = get_cities_ghs(ghs_full_df, cities)
df = get_cities_osmnx(df)



Unable to find i̇zmir in GHS DataFrame! We have excluded this city from further use.
Unable to find diyarbakır in GHS DataFrame! We have excluded this city from further use.
Unable to find nizhniy novgorod in GHS DataFrame! We have excluded this city from further use.
Unable to find rostov-na-donu in GHS DataFrame! We have excluded this city from further use.
Unable to find köln in GHS DataFrame! We have excluded this city from further use.
Unable to find eskişehir in GHS DataFrame! We have excluded this city from further use.
Currently we are calculating city number 1, istanbul
Currently we are calculating city number 2, moscow
Currently we are calculating city number 3, london
Currently we are calculating city number 4, london
Currently we are calculating city number 5, saint petersburg
Currently we are calculating city number 6, ankara


  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


Currently we are calculating city number 7, berlin
Currently we are calculating city number 8, madrid
Currently we are calculating city number 9, bursa
Currently we are calculating city number 10, bursa
Currently we are calculating city number 11, kyiv
Currently we are calculating city number 12, rome
Currently we are calculating city number 13, paris
Currently we are calculating city number 14, bucharest
Currently we are calculating city number 15, hamburg
Currently we are calculating city number 16, gaziantep
Currently we are calculating city number 17, adana
Currently we are calculating city number 18, minsk
Currently we are calculating city number 19, budapest
Currently we are calculating city number 20, warsaw
Currently we are calculating city number 21, vienna
Currently we are calculating city number 22, barcelona
Currently we are calculating city number 23, barcelona
Currently we are calculating city number 24, stockholm
Currently we are calculating city number 25, kharkiv
Curre

  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


Currently we are calculating city number 27, milan
Currently we are calculating city number 28, antalya


  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


Currently we are calculating city number 29, belgrade
Currently we are calculating city number 30, munich
Currently we are calculating city number 31, kazan
Currently we are calculating city number 32, kayseri
Currently we are calculating city number 33, prague
Currently we are calculating city number 34, samara
Currently we are calculating city number 35, copenhagen
Currently we are calculating city number 36, sofia
Currently we are calculating city number 37, birmingham
Currently we are calculating city number 38, birmingham
Currently we are calculating city number 39, voronezh
Currently we are calculating city number 40, dublin
Currently we are calculating city number 41, brussels
Currently we are calculating city number 42, odesa
Currently we are calculating city number 43, volgograd
Currently we are calculating city number 44, dnipro
Currently we are calculating city number 45, naples
Currently we are calculating city number 46, naples
Currently we are calculating city number 47, 

In [4]:
print(df["orientation_entropy"])

0     3.576373
1     3.580096
2     3.566809
3     3.566809
4     3.563382
5     3.573773
6     3.571679
7     3.561082
8     3.532571
9     3.532571
10    3.565109
11    3.578502
12    3.570617
13    3.572389
14    3.562467
15    3.559815
16    3.522194
17    3.569592
18    3.528182
19    3.547383
20    3.527261
21    3.460493
22    3.460493
23    3.577206
24    3.570279
25    3.564995
26    3.553154
27    3.553697
28    3.563658
29    3.497077
30    3.503014
31    3.571473
32    3.529855
33    3.485791
34    2.955305
35    3.559401
36    3.581984
37    3.581984
38    3.545326
39    3.554272
40    3.541381
41    3.551962
42    3.533449
43    3.548593
44    3.575805
45    3.575805
46    3.451656
47    3.343622
48    3.569339
Name: orientation_entropy, dtype: float64
