In [9]:
# All necessary imports for this notebook

import math
import pandas as pd
import geohash

In [10]:
# Load Air Tube Data into a Data Frame

df = pd.DataFrame(pd.read_csv('data_bg_2017.csv'))

In [11]:
# Extract longitude from geohash
def get_longitude(ghash):
    try:
        return geohash.decode(ghash)[0]
    except ValueError as e:
        print("value: {}, error: {}".format(ghash, e))
        return -1
    except TypeError as e:
        print("value: {}, error: {}".format(ghash, e))
        return -1

# Extract lattitude from geohash
def get_lattitude(ghash):
    try:
        return geohash.decode(ghash)[1]
    except ValueError as e:
        print("geohash: {}, error: {}".format(ghash, e))
        return -1
    except TypeError as e:
        print("geohash: {}, error: {}".format(ghash, e))
        return -1

# Assign longitude and lattitude
df['longitude'] = df['geohash'].apply(get_longitude)
df['latitude'] = df['geohash'].apply(get_lattitude)

value: m-2105171, error: geohash code is [0123456789bcdefghjkmnpqrstuvwxyz]+
geohash: m-2105171, error: geohash code is [0123456789bcdefghjkmnpqrstuvwxyz]+


In [12]:
# Remove rows with an invalid geohash

df = df[(df['longitude'] != -1) & (df['latitude'] != -1)]

In [13]:
# Filter data only for Sofia

def distance(x1, x2, y1, y2):
    return math.sqrt((y1 - x1) ** 2 + (y2 - x2) ** 2)

sofia_center = [42.697708, 23.321867]
german_village = [42.609620, 23.410710]
sofia_radius = distance(*sofia_center, *german_village)

df['in_sofia'] = df[['longitude', 'latitude']].apply(
    lambda point: 1 if distance(*point, *sofia_center) < sofia_radius else 0, axis=1
)

# unique_geohashes = df['geohash'].unique()

# Save to CSV
# df[df.in_sofia == 1][['geohash', 'longitude', 'latitude']].to_csv('geohash_mapping_sofia_2017.csv', index=False)

sofia_df = df[df.in_sofia == 1]

sofia_df.head()

Unnamed: 0,time,geohash,P1,P2,temperature,humidity,pressure,longitude,latitude,in_sofia
0,2017-09-06T20:00:00Z,sx8d5r7wmxr,9,8,14,55,0,42.58,23.351,1
1,2017-09-06T20:00:00Z,sx8d6zjg5h8,9,8,0,0,0,42.622,23.332,1
6,2017-09-06T20:00:00Z,sx8dem6pp1h,15,11,14,54,94841,42.657,23.349,1
9,2017-09-06T20:00:00Z,sx8devjgkk2,14,13,18,10,0,42.655,23.375999,1
12,2017-09-06T20:00:00Z,sx8d9g29ns5,0,0,17,55,93535,42.645,23.281,1


In [14]:
sofia_df.describe()

Unnamed: 0,P1,P2,temperature,humidity,pressure,longitude,latitude,in_sofia
count,419642.0,419642.0,419642.0,419642.0,419642.0,419642.0,419642.0,419642.0
mean,43.176872,23.397098,8.939606,61.674442,81517.876693,42.678531,23.323406,1.0
std,78.353541,39.862942,12.704712,22.505932,33257.032365,0.029506,0.041544,0.0
min,0.0,0.0,-148.0,0.0,0.0,42.58,23.229,1.0
25%,10.0,7.0,4.0,50.0,93982.0,42.661,23.291,1.0
50%,22.0,13.0,9.0,66.0,94915.0,42.68,23.323999,1.0
75%,45.0,26.0,14.0,78.0,95513.0,42.694,23.355,1.0
max,2000.0,1000.0,63.0,100.0,162962.0,42.804,23.419001,1.0
