In [1]:
from google.colab import drive
import pandas as pd
import numpy as np
import os
import glob
from datetime import *
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
class display(object):
    """Display HTML representation of multiple objects"""
    template = """<div style="float: left; padding: 10px;">
    <p style='font-family:"Courier New", Courier, monospace'>{0}</p>{1}
    </div>"""
    def __init__(self, *args):
        self.args = args

    def _repr_html_(self):
        return '\n'.join(self.template.format(a, eval(a)._repr_html_())
                         for a in self.args)

    def __repr__(self):
        return '\n\n'.join(a + '\n' + repr(eval(a))
                           for a in self.args)

In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
PATH = "drive/MyDrive/Hackathon - Air Quality Notebooks/Data"

dfs = []
for i in range(0, 6):
  tmp_df = pd.read_csv(f'{PATH}/data{i:02d}.csv', sep=',')
  dfs.append(tmp_df)
df = pd.concat(dfs)

# Convert Timestamp String to DateTime

In [5]:
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.sort_values("timestamp")

In [6]:
print(df.dtypes)
df

timestamp          datetime64[ns, pytz.FixedOffset(60)]
value                                           float64
parameter                                        object
device_id                                         int64
chip_id                                          object
sensor_type                                       int64
sensor_id                                         int64
location_id                                       int64
location                                         object
street_name                                      object
city                                             object
country                                          object
latitude                                        float64
longitude                                       float64
deployment_date                                  object
dtype: object


Unnamed: 0,timestamp,value,parameter,device_id,chip_id,sensor_type,sensor_id,location_id,location,street_name,city,country,latitude,longitude,deployment_date
153157,2022-01-01 00:00:18.085000+01:00,27.00,PM 2.5,97,esp8266-1534596,9,158,7,Code for Kenya,"Nairobi Garage, 8th Floor, Pinetree Plaza, Kab...",Nairobi,Kenya,-1.298294,36.790870,2020-01-21 12:12:46.533 +0100
153158,2022-01-01 00:00:18.085000+01:00,32.00,PM 10,97,esp8266-1534596,9,158,7,Code for Kenya,"Nairobi Garage, 8th Floor, Pinetree Plaza, Kab...",Nairobi,Kenya,-1.298294,36.790870,2020-01-21 12:12:46.533 +0100
153159,2022-01-01 00:00:18.085000+01:00,17.67,PM 1,97,esp8266-1534596,9,158,7,Code for Kenya,"Nairobi Garage, 8th Floor, Pinetree Plaza, Kab...",Nairobi,Kenya,-1.298294,36.790870,2020-01-21 12:12:46.533 +0100
153160,2022-01-01 00:00:24.050000+01:00,94.40,Humidity,97,esp8266-1534596,2,159,7,Code for Kenya,"Nairobi Garage, 8th Floor, Pinetree Plaza, Kab...",Nairobi,Kenya,-1.298294,36.790870,2020-01-21 12:13:17.252 +0100
153161,2022-01-01 00:00:24.050000+01:00,16.50,Temperature,97,esp8266-1534596,2,159,7,Code for Kenya,"Nairobi Garage, 8th Floor, Pinetree Plaza, Kab...",Nairobi,Kenya,-1.298294,36.790870,2020-01-21 12:13:17.252 +0100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
985859,2022-09-10 09:19:51.687000+01:00,22.70,Temperature,105,esp8266-1532917,2,177,3612,Redcliffe Gardens,Muringa Road,Nairobi,Kenya,-1.296000,36.776000,2020-08-12 11:06:59.812 +0100
985860,2022-09-10 09:19:53.711000+01:00,61.20,Humidity,99,esp8266-1530897,2,163,7,Code for Kenya,"Nairobi Garage, 8th Floor, Pinetree Plaza, Kab...",Nairobi,Kenya,-1.298294,36.790870,2020-01-21 12:47:32.742 +0100
985861,2022-09-10 09:19:53.711000+01:00,22.20,Temperature,99,esp8266-1530897,2,163,7,Code for Kenya,"Nairobi Garage, 8th Floor, Pinetree Plaza, Kab...",Nairobi,Kenya,-1.298294,36.790870,2020-01-21 12:47:32.742 +0100
985863,2022-09-10 09:20:02.183000+01:00,22.70,Temperature,72,esp8266-2609194,2,139,3576,"Mathare Social Justice Center, Juja Rd",Juja Rd,Nairobi,Kenya,-1.265495,36.856859,2019-03-27 10:03:36.464 +0100


In [17]:
# Grouping by 'device_id' and similar timestamps, aggregating 'value_type' and 'value' into lists
grouped = (df.groupby(['device_id', pd.Grouper(key='timestamp', freq='10s')])
           .agg({'parameter': list, 'value': list, 'latitude': max, 'longitude': max})
           .reset_index())

In [22]:
grouped['parameter'] = grouped['parameter'].apply(np.array)
grouped['value'] = grouped['value'].apply(np.array)

In [23]:
df27 = df[df['device_id'] == 27].head(25)
grouped27 = grouped[grouped['device_id'] == 27].head(25)
display('grouped27', 'df27')

Unnamed: 0,device_id,timestamp,parameter,value,latitude,longitude
0,27,2022-05-25 08:58:20+01:00,"[PM 10, PM 2.5, PM 1, Humidity, Temperature]","[59.5, 46.5, 32.0, 54.4, 24.1]",-1.288985,36.824679
1,27,2022-05-25 08:58:50+01:00,"[PM 1, PM 10, PM 2.5, Temperature, Humidity]","[29.0, 48.0, 41.0, 24.0, 54.8]",-1.288985,36.824679
2,27,2022-05-25 08:59:20+01:00,"[PM 2.5, PM 10, PM 1, Temperature, Humidity]","[49.0, 61.5, 33.0, 24.1, 55.2]",-1.288985,36.824679
3,27,2022-05-25 08:59:50+01:00,"[PM 10, PM 2.5, PM 1, Temperature, Humidity]","[62.0, 48.4, 34.0, 24.1, 56.0]",-1.288985,36.824679
4,27,2022-05-25 09:00:30+01:00,"[PM 2.5, PM 10, PM 1, Humidity, Temperature]","[45.5, 55.5, 32.0, 54.7, 24.2]",-1.288985,36.824679
5,27,2022-06-14 08:08:50+01:00,"[PM 2.5, PM 1, PM 10, Temperature, Humidity]","[28.4, 20.6, 35.4, 20.9, 62.5]",-1.288985,36.824679
6,27,2022-06-14 08:09:20+01:00,"[PM 2.5, PM 10, PM 1, Humidity, Temperature]","[23.6, 27.6, 16.4, 62.0, 20.9]",-1.288985,36.824679
7,27,2022-06-14 08:09:50+01:00,"[PM 2.5, PM 10, PM 1, Humidity, Temperature]","[30.2, 34.0, 19.8, 61.5, 20.9]",-1.288985,36.824679
8,27,2022-06-14 08:10:20+01:00,"[PM 1, PM 10, PM 2.5]","[17.5, 30.0, 26.0]",-1.288985,36.824679
9,27,2022-06-14 08:10:30+01:00,"[Humidity, Temperature]","[61.3, 20.9]",-1.288985,36.824679

Unnamed: 0,timestamp,value,parameter,device_id,chip_id,sensor_type,sensor_id,location_id,location,street_name,city,country,latitude,longitude,deployment_date
334708,2022-05-25 08:58:21.831000+01:00,59.5,PM 10,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
334707,2022-05-25 08:58:21.831000+01:00,46.5,PM 2.5,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
334709,2022-05-25 08:58:21.831000+01:00,32.0,PM 1,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
334710,2022-05-25 08:58:22.622000+01:00,54.4,Humidity,27,esp8266-11639153,2,50,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:38:12.408 +0100
334711,2022-05-25 08:58:22.622000+01:00,24.1,Temperature,27,esp8266-11639153,2,50,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:38:12.408 +0100
334719,2022-05-25 08:58:54.043000+01:00,29.0,PM 1,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
334718,2022-05-25 08:58:54.043000+01:00,48.0,PM 10,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
334717,2022-05-25 08:58:54.043000+01:00,41.0,PM 2.5,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
334721,2022-05-25 08:58:55.115000+01:00,24.0,Temperature,27,esp8266-11639153,2,50,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:38:12.408 +0100
334720,2022-05-25 08:58:55.115000+01:00,54.8,Humidity,27,esp8266-11639153,2,50,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:38:12.408 +0100


# Time Gap
These are the ones with massive time gaps between them (3o minutes or more). We could theoretically use this set as our test set.

In [24]:
massiveTimeGap = grouped[grouped['parameter'].str.len() < 5].copy()

In [25]:
massiveTimeGap

Unnamed: 0,device_id,timestamp,parameter,value,latitude,longitude
8,27,2022-06-14 08:10:20+01:00,"[PM 1, PM 10, PM 2.5]","[17.5, 30.0, 26.0]",-1.288985,36.824679
9,27,2022-06-14 08:10:30+01:00,"[Humidity, Temperature]","[61.3, 20.9]",-1.288985,36.824679
14,27,2022-06-14 08:14:20+01:00,"[PM 1, PM 10, PM 2.5]","[14.2, 22.8, 21.2]",-1.288985,36.824679
15,27,2022-06-14 08:14:30+01:00,"[Humidity, Temperature]","[60.4, 21.4]",-1.288985,36.824679
39,27,2022-06-14 08:27:10+01:00,"[PM 10, PM 1, PM 2.5]","[19.0, 12.4, 17.0]",-1.288985,36.824679
...,...,...,...,...,...,...
1435035,3774,2022-03-30 13:28:40+01:00,"[Temperature, Humidity, PM 10, PM 2.5]","[25.0, 52.0, 14.0, 21.0]",-1.298294,36.790870
1435036,3774,2022-03-30 13:31:20+01:00,"[Humidity, Temperature, PM 2.5, PM 10]","[52.0, 25.0, 21.0, 14.0]",-1.298294,36.790870
1435037,3774,2022-03-30 13:34:00+01:00,"[Temperature, Humidity, PM 10, PM 2.5]","[25.0, 52.0, 14.0, 21.0]",-1.298294,36.790870
1435038,3774,2022-03-30 13:36:40+01:00,"[PM 2.5, PM 10, Humidity, Temperature]","[21.0, 14.0, 52.0, 25.0]",-1.298294,36.790870


## Removed Massive Time Gap from Training Data

In [26]:
grouped.drop(massiveTimeGap.index, inplace=True)

In [27]:
grouped[grouped['parameter'].str.len() < 5]

Unnamed: 0,device_id,timestamp,parameter,value,latitude,longitude


## Label-Data Split

In [39]:
valueNumpy = np.vstack(grouped['value'].to_numpy())

ValueError: ignored

In [43]:
grouped[grouped['value'].str.len() > 5]

Unnamed: 0,device_id,timestamp,parameter,value,latitude,longitude
24881,27,2022-06-22 23:09:50+01:00,"[PM 1, PM 2.5, PM 10, PM 1, PM 2.5, PM 10, Hum...","[10.6, 18.6, 20.6, 11.2, 17.2, 17.8, 52.5, 21.6]",-1.288985,36.824679
38665,27,2022-06-29 16:30:10+01:00,"[PM 2.5, PM 10, PM 1, Humidity, Temperature, P...","[38.75, 46.75, 26.75, 56.2, 19.9, 41.8, 51.2, ...",-1.288985,36.824679
44392,27,2022-07-01 17:10:10+01:00,"[PM 1, PM 10, PM 2.5, PM 2.5, PM 10, PM 1, Hum...","[29.0, 56.8, 44.4, 60.8, 72.6, 44.2, 63.9, 18.6]",-1.288985,36.824679
56598,27,2022-07-08 18:49:50+01:00,"[PM 2.5, PM 10, PM 1, Humidity, Temperature, H...","[49.0, 64.2, 32.0, 84.7, 16.2, 84.8, 16.2]",-1.288985,36.824679
70558,27,2022-07-13 17:09:00+01:00,"[PM 2.5, PM 10, PM 1, Humidity, Temperature, T...","[16.75, 19.5, 12.75, 38.4, 20.4, 20.5, 38.3]",-1.288985,36.824679
...,...,...,...,...,...,...
1415120,105,2022-07-22 16:08:40+01:00,"[Humidity, Temperature, PM 10, PM 1, PM 2.5, T...","[60.5, 21.9, 15.0, 10.0, 15.0, 21.9, 60.5]",-1.296000,36.776000
1416693,105,2022-07-23 05:03:20+01:00,"[Temperature, Humidity, PM 1, PM 10, PM 2.5, H...","[20.2, 67.0, 3.0, 4.0, 4.0, 67.0, 20.2]",-1.296000,36.776000
1418424,105,2022-07-23 19:25:10+01:00,"[PM 10, PM 1, PM 2.5, PM 2.5, PM 10, PM 1, Hum...","[31.0, 18.25, 28.25, 27.6, 27.6, 18.0, 62.0, 2...",-1.296000,36.776000
1423677,105,2022-07-25 15:08:10+01:00,"[Humidity, Temperature, PM 1, PM 10, PM 2.5, H...","[55.9, 23.4, 8.2, 11.6, 11.6, 56.0, 23.4]",-1.296000,36.776000


In [46]:
df[(df['device_id'] == 27) & (df['value'] == 10.6) ]

Unnamed: 0,timestamp,value,parameter,device_id,chip_id,sensor_type,sensor_id,location_id,location,street_name,city,country,latitude,longitude,deployment_date
677196,2022-06-14 08:26:47.694000+01:00,10.6,PM 1,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
677357,2022-06-14 08:30:31.503000+01:00,10.6,PM 1,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
679929,2022-06-14 09:12:53.063000+01:00,10.6,PM 1,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
679775,2022-06-14 09:25:41.475000+01:00,10.6,PM 1,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
679609,2022-06-14 09:33:09.150000+01:00,10.6,PM 1,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145323,2022-09-10 02:43:11.329000+01:00,10.6,PM 1,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
148442,2022-09-10 03:12:23.566000+01:00,10.6,PM 1,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
148590,2022-09-10 03:58:30.311000+01:00,10.6,PM 1,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
148944,2022-09-10 04:21:00.983000+01:00,10.6,PM 2.5,27,esp8266-11639153,9,49,3573,"August 7th Memorial Park, Haile Selassie Ave",Haile Selassie Ave,Nairobi,Kenya,-1.288985,36.824679,2018-07-17 12:37:58.501 +0100
