# Title
## Subtitle

Author name

# First slide

Content of the first slide

In [1]:
import numpy as np

In [2]:
np.random.randint(low=1, high=7, size=15)

array([6, 2, 3, 5, 5, 3, 2, 4, 4, 4, 4, 3, 1, 1, 1])

# Second slide

Example of ipyleaflet showing maps

In [3]:
from ipyleaflet import Map, Marker

In [4]:
center = (52.204793, 360.121558)
m = Map(center=center, zoom=15)
marker = Marker(location=center, draggable=True)
m.add_layer(marker);
display(m)

Map(center=[52.204793, 360.121558], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title'…

## Example of heatmap on top of map

In [5]:
from ipyleaflet import Map, Heatmap
from random import uniform
m = Map(center=(0, 0), zoom=2)

heatmap = Heatmap(
    locations=[[uniform(-80, 80), uniform(-180, 180), uniform(0, 1000)] for i in range(2000)],
    radius=20,
    gradient={0.4: 'blue', 0.6: 'cyan', 0.7: 'lime', 0.8: 'yellow', 1.0: 'red'}
)

m.add_layer(heatmap);
display(m)

Map(center=[0, 0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_text'…

# Opendata Bristol

Example of OpenData Bristol on top of Bristol's map

In [6]:
air_quality_bristol_query = 'https://opendata.bristol.gov.uk/api/records/1.0/search/?dataset=air-quality-data-continuous&q=&rows=1000&sort=date_time&facet=date_time&facet=coordinates&facet=temp'

import urllib, json

response = urllib.request.urlopen(air_quality_bristol_query)

data = json.loads(response.read())

print('Json root keys')
print(data.keys())
print('3 records')
data['records'][:3]

Json root keys
dict_keys(['nhits', 'parameters', 'records', 'facet_groups'])
3 records


[{'datasetid': 'air-quality-data-continuous',
  'recordid': 'ab1ad5fd789cfc7afef424f411c2e206a7bcb433',
  'fields': {'datestart': '2002-02-01T00:00:00+00:00',
   'temp': 9.924299,
   'date_time': '2021-02-15T17:00:00+00:00',
   'no': 24.25415,
   'geo_point_2d': [51.432675707, -2.60495665673],
   'current': 'True',
   'nox': 87.210001,
   'location': 'Parson Street School',
   'siteid': 215,
   'no2': 50.059687,
   'rh': 24.55922,
   'instrumenttype': 'Continuous (Reference)',
   'pm25': 7.2222231},
  'geometry': {'type': 'Point', 'coordinates': [-2.60495665673, 51.432675707]},
  'record_timestamp': '2021-02-15T17:15:11.183000+00:00'},
 {'datasetid': 'air-quality-data-continuous',
  'recordid': '8382f7cf019c6cec0950f5c729a0d4f46fc1e48f',
  'fields': {'datestart': '2003-05-23T00:00:00+00:00',
   'date_time': '2021-02-15T17:00:00+00:00',
   'no': 55.4915,
   'geo_point_2d': [51.4278638883, -2.56374153315],
   'current': 'True',
   'nox': 127.850628,
   'location': 'Wells Road',
   'sitei

In [7]:
def get_records(data, fields=None):
    '''
    data: json object
    fields: list of strings
        Each dictionary entry with the field name is retrieved in the given order
    return
    list of list of values
    '''
    for record in data['records']:
        record_fields = record['fields']
        if fields is None:
            yield record_fields
        else:
            for key in fields:
                yield [record_fields.get(key, np.nan) for key in fields]

fields = ['date_time', 'geo_point_2d', 'temp', 'no', 'no2', 'nox']
                
record_values = list(get_records(data, fields=fields))

record_values[:5]

[['2021-02-15T17:00:00+00:00',
  [51.432675707, -2.60495665673],
  9.924299,
  24.25415,
  50.059687,
  87.210001],
 ['2021-02-15T17:00:00+00:00',
  [51.432675707, -2.60495665673],
  9.924299,
  24.25415,
  50.059687,
  87.210001],
 ['2021-02-15T17:00:00+00:00',
  [51.432675707, -2.60495665673],
  9.924299,
  24.25415,
  50.059687,
  87.210001],
 ['2021-02-15T17:00:00+00:00',
  [51.432675707, -2.60495665673],
  9.924299,
  24.25415,
  50.059687,
  87.210001],
 ['2021-02-15T17:00:00+00:00',
  [51.432675707, -2.60495665673],
  9.924299,
  24.25415,
  50.059687,
  87.210001]]

In [8]:
m = Map(center=(51.454500, -2.587900), zoom=12)
gradient={0.4: 'blue', 0.6: 'cyan', 0.7: 'lime', 0.8: 'yellow', 1.0: 'red'}

position_id = []

field_map = {field: i for i, field in enumerate(fields)}
heatmap = Heatmap(
    locations=[[values[field_map['geo_point_2d']][0],
                values[field_map['geo_point_2d']][1],
                values[field_map['no']]] for values in record_values],
    radius=20,
    gradient=gradient
)

m.add_layer(heatmap);
display(m)

Map(center=[51.4545, -2.5879], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zo…

# Pandas dataframe

We will use pandas to analyse the data

In [9]:
import pandas as pd
df_records = pd.DataFrame(record_values, columns=fields)
df_records[['Longitude', 'Latitude']] = pd.DataFrame(df_records['geo_point_2d'].tolist(), index= df_records.index)
df_records = df_records.sort_values(by='date_time')
df_records = df_records.drop_duplicates(subset=['Longitude', 'Latitude'], keep='first')
df_records

Unnamed: 0,date_time,geo_point_2d,temp,no,no2,nox,Longitude,Latitude
5999,2021-02-09T15:00:00+00:00,"[51.4552693825, -2.59664882861]",,23.26001,44.932065,80.601245,51.455269,-2.596649
5988,2021-02-09T15:00:00+00:00,"[51.4278638883, -2.56374153315]",,15.77455,18.599062,42.84,51.427864,-2.563742
5964,2021-02-09T16:00:00+00:00,"[51.432675707, -2.60495665673]",0.319902,42.08625,60.578438,125.173126,51.432676,-2.604957
5963,2021-02-09T16:00:00+00:00,"[51.4579497129, -2.58398909033]",,36.546,58.523,114.559,51.45795,-2.583989
5951,2021-02-09T16:00:00+00:00,"[51.4417471802, -2.55995583224]",,3.460425,13.3875,18.646875,51.441747,-2.559956
5986,2021-02-09T16:00:00+00:00,"[51.4628294172, -2.58454081635]",,,,,51.462829,-2.584541
5981,2021-02-09T16:00:00+00:00,"[51.4780449714, -2.53523027459]",,27.309301,37.67625,79.607812,51.478045,-2.53523


# Binary probabilistic classifier

Simulation of a probabilistic classifier with a linear transformation to convert raw values into the interval [0, 1]

In [10]:
df_records = df_records.dropna(subset=['no'])
df_records['no_scaled'] = (df_records['no'] - df_records['no'].min())/(df_records['no'].max() - df_records['no'].min())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_records['no_scaled'] = (df_records['no'] - df_records['no'].min())/(df_records['no'].max() - df_records['no'].min())


In [11]:
locations = df_records[['Longitude', 'Latitude', 'no_scaled']].values
locations = [list(row) for row in locations]

In [12]:
m = Map(center=(51.454500, -2.587900), zoom=12)
gradient={0.0: 'blue', 1.0: 'red'}

heatmap = Heatmap(
    locations=locations,
    radius=40,
    gradient=gradient,
    min_opacity=0.8,
)

m.add_layer(heatmap);
display(m)

Map(center=[51.4545, -2.5879], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zo…