In [1]:
#IMPORTS
from kaggle_secrets import UserSecretsClient
from IPython.display import clear_output
from geopy.geocoders import Nominatim
from bokeh.io import output_notebook
from datetime import date, timedelta
from tqdm import tqdm
import geopy.distance
import pandas as pd
import numpy as np
import requests
import re

In [2]:
#API KEY
user_secrets = UserSecretsClient()
aqicn_key = user_secrets.get_secret("aqicn_token")
telraam_key = user_secrets.get_secret("telraam_key")
tomtom_key = user_secrets.get_secret("tomtom_key")

In [None]:
url = f'https://api.tomtom.com/traffic/trafficstats/trafficdensity/1?key={tomtom_key}'

payload = {
  "jobName":"Test job",
  "distanceUnit":"KILOMETERS",
  "network": {
    "name": "test",
      "geometry" : {
      "type": "MultiPolygon",
      "coordinates": [
        [
          [
            [19.44305, 51.75612],
            [19.44992, 51.75612],
            [19.44992, 51.75947],
            [19.44305, 51.75947],
            [19.44305, 51.75612]
          ]
        ],
        [
          [
            [19.45011, 51.75789],
            [19.45687, 51.75789],
            [19.45687, 51.75946],
            [19.45011, 51.75946],
            [19.45011, 51.75789]
          ]
        ]
      ]
    },
    "timeZoneId": "Europe/Warsaw",
    "frcs": [
      "0",
      "1",
      "2",
      "3",
      "4",
      "5",
      "6",
      "7",
      "8"
    ]
},
  "dateRange":
    {
      "name":"Last working week of January",
      "from":"2021-01-25",
      "to":"2021-01-29",
      "exclusions":[
        "2021-01-26",
        "2021-01-27"
      ]
    },
  "timeSets":[
    {
      "name":"Monday morning hour",
      "timeGroups":[
        {
          "days":[
            "MON"
          ],
          "times":[
            "7:00-8:00"
          ]
        }
      ]
    }
  ]
}

response = requests.request("POST", url, data=payload)
response.content

In [None]:
#TELRAAM
streets = {
    'AbramJoffe_straße': 9000002109,
    'AlteJakob_straße': 9000002582,
    'Blumenthal_straße': 9000004110,
    'Böckh_straße': 9000002445,
    'Dörpfeldstrasse': 9000002074,
    'Emser_straße': 9000002685,
    'Friesen_straße': 9000003088,
    'Fanninger_straße': 9000003271,
    'Friedel_straße': 9000004118,
    'Gemeinschafts_straße': 9000002204,
    'Genossenschafts_straße': 9000003642,
    'Gleim_straße': 9000003874,
    'Galenus_straße': 9000004233,
    'Grenz_straße': 9000003520,
    'Hentig_straße': 9000003620,
    'Heiligenberger_straße': 9000004336,
    'Heidelberger_straße': 9000004336,
    'Hertzberg_straße': 9000002889,
    'Jessner_straße': 9000004132,
    'Karlsgarten_straße': 9000003783,
    'Lück_straße': 9000003747,
    'Leonhardyweg_straße': 9000003334,
    'Langenscheidt_straße': 9000003997,
    'Lück_straße': 9000003740,
    'Gutenberg_straße': 9000003306,
    'Rolandseck_straße': 9000003991,
    'Radicke_straße': 9000003318,
    'Rheinstein_straße': 9000004284,
    'Tegeler_straße': 9000003144,
    'Matthias_straße': 9000003702,
    'Niederbarnim_straße': 9000003000,
    'Platanen_straße': 9000003006,
    'Proskauer_straße': 9000003072,
    'Rathaus_straße': 9000003738,
    'Schott_straße': 9000004116,
    'Schönlein_straße': 9000004089,
    'Seelower_straße': 9000003786,
    'Simplon_straße': 9000003076,
    'SalvadorAllende-straße': 9000003274,
    'Tuchoslky_straße': 9000004033,
    'Uhland_straße': 9000003439,
    'Vineta_straße': 9000003731,
    'Waldenser_straße': 9000003760,
    'Waldowallee_straße': 9000003172,
    'WernerVossDamm_straße': 9000003312,
    'Weimarer_straße': 9000003267,
    'Wühlisch_straße': 9000004286,
    'Württembergische_straße': 9000003561,
    'Wilski_straße': 9000003084,
    'Xantener_straße': 9000004035,
    'Zossener_Straße': 9000003905,
}

In [None]:
#LIVE DATA FROM ALL CAMERAS  
url = "https://telraam-api.net/v1/reports/traffic_snapshot_live"

payload = {}

headers = {
  'X-Api-Key': telraam_key
}

response = requests.request("GET", url, headers=headers, data=payload)
response.content
json = response.json()
data = pd.DataFrame(json['features'])

In [5]:
#ALL TELRAAM DEVICES FROM BERLIN AREA
url = "https://telraam-api.net/v1/reports/traffic_snapshot"

payload = {
    'time':'live',
    'contents':'minimal',
    'area':'13.115715454463032, 52.67212298591935, 13.735069652205851, 52.33524268936387'
    }

headers = {
  'X-Api-Key': telraam_key
}

response = requests.request("POST", url, headers=headers, data=str(payload))
json = response.json()
telraam_data = pd.DataFrame(json['features'])

#SETTING COORDINATES
coordinates = [str(point['coordinates'][0][len(point['coordinates'])//2]) for point in telraam_data['geometry']]
telraam_data.drop(['type', 'geometry'], axis=1, inplace=True)
telraam_data['coordinates'] = coordinates
telraam_data.set_index('coordinates', inplace=True)

#SETTING VALUES
values = [list(all_values.values()) for all_values in telraam_data['properties']]
telraam_data[list(telraam_data['properties'][0].keys())] = values
telraam_data.drop('properties', axis=1, inplace=True)

#CHECK WHICH SEGMENTS ARE REGULARY ONLINE
online_stations = telraam_data.loc[telraam_data['uptime'].str.isalpha() != False]

#TAKE SEGMENTS AND COORDINATES FROM ONLINE STATIONS
segments = list(online_stations['segment_id'])
coordinates = list(online_stations.index)
segment_coord = dict(zip(segments, coordinates))

In [25]:
#FETCH PAST DATA FROM ALL DEVICES - TELRAAM
telraam_history = pd.DataFrame()
errors = []

url = "https://telraam-api.net/v1/reports/traffic"

today = date.today().strftime("%Y-%m-%d 00:00:00")
from_time = (date.today() - timedelta(days = 30)).strftime("%Y-%m-%d 00:00:00")

for i, [segment, coordinate] in enumerate(segment_coord.items()):
    try:
        clear_output(wait=True)
        print(f'{i+1}/{len(segment_coord.items())}')

        payload  = {
        "level": "segments",
        "format": "per-hour",
        "id": segment,
        "time_start": from_time,
        "time_end": today
        }

        headers = {
          'X-Api-Key': telraam_key
        }

        response = requests.request("POST", url, headers=headers, data=str(payload))
        json = response.json()
        df_temp = pd.DataFrame(json['report'])
        df_temp['coordinates'] = coordinate
        telraam_history = pd.concat([telraam_history, df_temp[['segment_id', 'date', 'heavy', 'car', 'coordinates']]])
    except:
        errors.append(segment)
        print(pd.DataFrame(json))

telraam_history
telraam_history = telraam_history[(telraam_history['heavy']!= 0) & (telraam_history['car'] != 0)]
telraam_history['date'] = telraam_history['date'].map(lambda x: x.split('T')[0] + " " + x.split('T')[1][0:5])

54/54


Unnamed: 0,segment_id,date,heavy,car,coordinates
0,9000001661,2022-10-21T00:00:00.000Z,0.0,0.0,"[13.3095203936159, 52.5088793031756]"
1,9000001661,2022-10-21T01:00:00.000Z,0.0,0.0,"[13.3095203936159, 52.5088793031756]"
2,9000001661,2022-10-21T02:00:00.000Z,0.0,0.0,"[13.3095203936159, 52.5088793031756]"
3,9000001661,2022-10-21T03:00:00.000Z,0.0,0.0,"[13.3095203936159, 52.5088793031756]"
4,9000001661,2022-10-21T04:00:00.000Z,0.0,0.0,"[13.3095203936159, 52.5088793031756]"
...,...,...,...,...,...
152,9000004439,2022-11-19T19:00:00.000Z,0.0,0.0,"[13.5337637935435, 52.4790705032192]"
153,9000004439,2022-11-19T20:00:00.000Z,0.0,0.0,"[13.5337637935435, 52.4790705032192]"
154,9000004439,2022-11-19T21:00:00.000Z,0.0,0.0,"[13.5337637935435, 52.4790705032192]"
155,9000004439,2022-11-19T22:00:00.000Z,0.0,0.0,"[13.5337637935435, 52.4790705032192]"


In [None]:
# COMMUNITY SENSOR DATA
lat_lang = '52.67212298591935,13.115715454463032,52.33524268936387,13.735069652205851'
url = f'https://data.sensor.community/airrohr/v1/filter/box={lat_lang}&type=SDS011'
data = requests.get(url)
json = data.json()
data = pd.DataFrame(json)

In [139]:
telraam_history_v2

Unnamed: 0,segment_id,date,heavy,car,coordinates
5,9000001661,2022-10-21 05:00,11.111111,33.333333,"[13.3095203936159, 52.5088793031756]"
35,9000001661,2022-10-22 11:00,1.425743,11.405941,"[13.3095203936159, 52.5088793031756]"
79,9000001661,2022-10-24 07:00,1.242236,7.453416,"[13.3095203936159, 52.5088793031756]"
183,9000001661,2022-10-28 15:00,1.531263,6.125053,"[13.3095203936159, 52.5088793031756]"
248,9000001661,2022-10-31 08:00,1.261388,11.352488,"[13.3095203936159, 52.5088793031756]"
...,...,...,...,...,...
288,9000004394,2022-11-19 10:00,15.286624,157.961783,"[13.6200191935049, 52.3739569032671]"
289,9000004394,2022-11-19 11:00,9.003215,110.610932,"[13.6200191935049, 52.3739569032671]"
290,9000004394,2022-11-19 12:00,10.249110,80.711744,"[13.6200191935049, 52.3739569032671]"
291,9000004394,2022-11-19 13:00,6.252171,57.519972,"[13.6200191935049, 52.3739569032671]"


In [None]:
#FETCHING ALL DATA FROM COMMUNITY TABLE
url = f'https://public.opendatasoft.com/api/records/1.0/search/?dataset=api-luftdateninfo&q=Berlin&rows=1000&facet=timestamp&facet=land&facet=value_type&facet=is_indoor&refine.is_indoor=0'
data = requests.get(url)
json = data.json()
df = pd.DataFrame(json['records'])
df = pd.DataFrame(list(df['fields']))

In [None]:
#ONLY 18 STATIONS
lat_lang = '52.67212298591935,13.115715454463032,52.33524268936387,13.735069652205851'
url = f'https://api.waqi.info/v2/map/bounds?latlng={lat_lang}&networks=all&token={aqicn_key}'
stations = requests.get(url)
json = stations.json()
df = pd.DataFrame(json)
df = pd.DataFrame(list(df['data']))

In [None]:
#BERLIN AIR QUALITY AQICN
lat = 52.5088793031756
lng = 13.3095203936159
url = f'https://api.waqi.info/feed/geo:{lat};{lng}/?token={aqicn_key}'
air_quality = requests.get(url)
json = air_quality.json()
df = pd.DataFrame(json)

In [127]:
#BERLIN AIR QUALITY 
berlin_air = pd.DataFrame()
today = date.today().strftime("%d.%m.%Y")
from_time = (date.today() - timedelta(days = 30)).strftime("%d.%m.%Y")

station_link = 'https://luftdaten.berlin.de/api/stations?active=true&include_hidden=false'
station_request = requests.get(station_link)
all_stations = pd.read_json(station_request.content)
stations_code = all_stations['code']

for i, code in enumerate(stations_code):
    clear_output(wait=True)
    print(f'{i+1}/{len(stations_code)}')

    data_link = f'https://luftdaten.berlin.de/api/stations/{code}/data?period=1h&timespan=custom&start={from_time}%2000%3A00&end={today}%2000%3A00'
    data_request = requests.get(data_link)
    temp_df = pd.read_json(data_request.content)
    temp_df['datetime'] = temp_df['datetime'].map(lambda x: x.split('T')[0] + " " + x.split('T')[1][0:5])
    temp_df.set_index('datetime', inplace=True)
    temp_df = pd.pivot(temp_df, columns='core', values='value')
    temp_df['station'] = code
    berlin_air = pd.concat([berlin_air, temp_df])

berlin_air.drop(['chb', 'cht', 'co'], axis=1, inplace=True)

18/18


core,no,no2,nox,o3,pm10,pm2,station,chb,cht,co
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-10-21 00:00,2.0,19.0,22.0,25.0,16.0,11.0,mc010,,,
2022-10-21 01:00,2.0,17.0,19.0,25.0,16.0,11.0,mc010,,,
2022-10-21 02:00,2.0,12.0,14.0,27.0,15.0,11.0,mc010,,,
2022-10-21 03:00,2.0,12.0,13.0,26.0,15.0,11.0,mc010,,,
2022-10-21 04:00,2.0,14.0,16.0,21.0,15.0,11.0,mc010,,,
...,...,...,...,...,...,...,...,...,...,...
2022-11-19 20:00,35.0,37.0,91.0,,20.0,16.0,MC221,,,
2022-11-19 21:00,22.0,32.0,65.0,,18.0,15.0,MC221,,,
2022-11-19 22:00,15.0,27.0,50.0,,18.0,14.0,MC221,,,
2022-11-19 23:00,11.0,26.0,43.0,,19.0,15.0,MC221,,,


In [138]:
berlin_air

core,no,no2,nox,o3,pm10,pm2,station
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-10-21 00:00,2.0,19.0,22.0,25.0,16.0,11.0,mc010
2022-10-21 01:00,2.0,17.0,19.0,25.0,16.0,11.0,mc010
2022-10-21 02:00,2.0,12.0,14.0,27.0,15.0,11.0,mc010
2022-10-21 03:00,2.0,12.0,13.0,26.0,15.0,11.0,mc010
2022-10-21 04:00,2.0,14.0,16.0,21.0,15.0,11.0,mc010
...,...,...,...,...,...,...,...
2022-11-19 20:00,35.0,37.0,91.0,,20.0,16.0,MC221
2022-11-19 21:00,22.0,32.0,65.0,,18.0,15.0,MC221
2022-11-19 22:00,15.0,27.0,50.0,,18.0,14.0,MC221
2022-11-19 23:00,11.0,26.0,43.0,,19.0,15.0,MC221


In [None]:
#CALCULATING SMALLEST DISTANCE FROM AIR QUALITY STATION TO TELRAAM DEVICE
station_coord = list(all_stations.apply(lambda x: [x.lat, x.lng], axis=1))
cord_dist = {}
smallest_dist = {}
for coord in online_stations.index:
    coord = re.sub("[^0-9 .]", '', coord)
    lng, lat = coord.split()
    coord = [float(lat), float(lng)]
    for coord2 in station_coord:
        cord_dist[geopy.distance.geodesic(coord, coord2).km] = [lat, lng, coord2[0], coord2[1]]
    smallest_dist[min(cord_dist)] = cord_dist[min(cord_dist)]
    cord_dist = {}

In [None]:
#FORMATING DATA
df = pd.DataFrame(smallest_dist)
df = df.transpose()
df.columns = ['lat1', 'long1', 'lat2', 'long2']
df[['lat1','lat2', 'long1', 'long2']] = df[['lat1','lat2', 'long1', 'long2']].astype(float)
df.sort_index(inplace=True)

#ADDING STATIONS THAT ARE TOO FAR FROM POINT
lat3 = []
long3 = []
for cord in station_coord:
    flag = False
    for cord2 in smallest_dist.values():
        if cord[0] in cord2:
            flag = True
    if flag == False:
        lat3.append(cord[0])
        long3.append(cord[1])

In [None]:
km = [1,2,3,4,5,6,7,8,9]
count = np.array(sum(i < df.index for i in km))
a, b = np.unique(count, return_counts=True)
dict(zip(a, b))

In [None]:
#CREAING POINTS ON MAP
import plotly.express as px
import pandas as pd

source = ['telraam'] * len(df['lat1']) + ['closer'] * len(df['lat2']) + ['further'] * len(lat3)
size = [0.25] * len(df['lat1']) + [0.5] * len(df['lat2']) + [0.5] * len(lat3)
lat = [*list(df['lat1']), *list(df['lat2']), *lat3]
long = [*list(df['long1']), *list(df['long2']), *long3]
new_df = pd.DataFrame([lat, long, source])
new_df = new_df.transpose()
new_df.columns = ['lat', 'long', 'source']

color_map  = {'telraam': 'blue', 'closer': 'yellow', 'further': 'red'}


fig = px.scatter_mapbox(new_df,
                        lat="lat", 
                        lon="long",
                        color='source',
                        color_discrete_map=color_map,
                        mapbox_style="open-street-map",
                        size = size,
                        zoom=9,
                        size_max = 15,
                        height=800,
                        width=1600)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
output_notebook()
fig.show()

In [None]:
#TOM TOM - 2500 querries na dzień

link = f'https://api.tomtom.com/traffic/services/4/flowSegmentData/absolute/10/json?key={tomtom_key}&point=52.524934,13.410133'
traffic_request = requests.get(link)
print(traffic_request.status_code)
traffic_data = pd.read_json(traffic_request.content)
traffic_flow = np.nan
if traffic_data.loc['roadClosure'][0] == False:
    traffic_flow = float(traffic_data.loc['freeFlowSpeed'] * 100 / traffic_data.loc['freeFlowTravelTime'])

In [None]:
#FIND NAME OF THE STREET
locator = Nominatim(user_agent='myGeocoder')
coordinates = '52.5090895031756, 13.3094958936159'
location = locator.reverse(coordinates)
location.raw['address']['road']

In [None]:
#UK
GROUPS = 'https://api.erg.ic.ac.uk/AirQuality/Information/Groups/Json' 
GROUPS = requests.get(GROUPS)

SITES = 'https://api.erg.ic.ac.uk/AirQuality/Information/MonitoringSites/GroupName=London'
SITES = requests.get(SITES)

In [None]:
sites = pd.read_xml(SITES.content)
open_sites = sites[sites['DateClosed'].isna()]
site_codes = list(open_sites['SiteCode'])
open_sites

In [None]:
#UK AIR QUALITY

df = pd.DataFrame()
today = date.today().strftime("%Y-%m-%d")
yesterday = (date.today() - timedelta(days = 1)).strftime("%Y-%m-%d")

for i in tqdm(range(len(site_codes))):
    try:
        querry = f'https://api.erg.ic.ac.uk/AirQuality/Data/Site/SiteCode={site_codes[i]}/StartDate={yesterday}/EndDate={today}'
        data = requests.get(querry)
        temp_df = pd.read_xml(data.content)
        temp_df = temp_df.pivot_table(index=['MeasurementDateGMT'], columns=['SpeciesCode'], values="Value")
        temp_df.insert(0, 'SITE_CODE', site_codes[i])
        df = pd.concat([df, temp_df])
    except:
        print(site_codes[i])
        continue

In [None]:
df

In [None]:
#UK AIR QUALITY

all_sites = list(df['SITE_CODE'].unique())
check_sites = list(set(site_codes) - set(all_sites))

errors = pd.DataFrame()
today = date.today().strftime("%Y-%m-%d")
yesterday = (date.today() - timedelta(days = 1)).strftime("%Y-%m-%d")

for site in check_sites:
        querry = f'https://api.erg.ic.ac.uk/AirQuality/Data/Site/SiteCode={site}/StartDate={yesterday}/EndDate={today}'
        print(querry)
        data = requests.get(querry)
        print(pd.read_xml(data.content))

In [None]:
#SEOUL POLLUTION DATA

CO2_PATH = "../input/seoul-co2/Seoul co2.csv"
POLLUTION_PATH = "../input/air-pollution-in-seoul/AirPollutionSeoul/Measurement_summary.csv"

co2_data = pd.read_csv(CO2_PATH).drop('City', axis=1)
co2_data['Date'] = pd.date_range(start="2019-01-01",end="2019-12-31").date
co2_data['Date'] = co2_data['Date'].astype(str)
co2_data.set_index('Date', inplace=True)

pollution_data = pd.read_csv(POLLUTION_PATH).drop(['Station code', 'Address', 'Latitude', 'Longitude'], axis=1)
pollution_data = pollution_data[pollution_data['Measurement date'].str.contains('2019')].set_index("Measurement date")
pollution_data = pollution_data[pollution_data > 0]

In [None]:
nan = pollution_data.isna().sum().sum()
notnan = pollution_data.notna().sum().sum()
print(f'NAN Percentage {round(nan*100/notnan, 2)}%')

In [None]:
dates = pd.date_range(start="2019-01-01",end="2019-12-31")
values = []
for date in dates:
    day = str(date.date())
    df_temp = pollution_data[pollution_data.index.str.contains(day)]
    value = df_temp.mean().values
    values.append([day, *value])

df = pd.DataFrame(values, columns=pollution_data.columns.insert(0, 'DATE'))
df.dropna(inplace=True)
df.set_index('DATE', inplace=True)
df.sort_index(inplace=True)
df = df.merge(co2_data, left_index=True, right_index=True)