In [1]:
import thingspeak
import pandas as pd
import json
import ssl
import plotly.express as px
import numpy as np
import plotly.io as pio
import datetime

ssl._create_default_https_context = ssl._create_unverified_context



In [2]:
API_KEY = "ZRDS32VNQEEFSOF4"
CHANNEL_ID = "1361623"

fields = {
    "field2": "TEMP [C]", 
    "field3":"Relative humidity", 
    "field4":"PM1 [ug/m3]", 
    "field5":"PM2.5 [ug/m3]", 
    "field6":"PM10 [ug/m3]",
    "created_at" : "Date/time"
}

new_fields = {v : k for (k,v) in zip(fields.keys(), fields.values())}

In [3]:
def setup_plotly():
    pd.options.plotting.backend = 'plotly'
    pio.templates.default = "plotly_dark"

setup_plotly()


In [42]:
def get_thingspeak_data(id: str, api_key: str, nb_of_results: int = 8000, drop_entry_id: bool = True, save: bool = False) -> pd.DataFrame:
    time_zone = "UTC"
    url = f"https://thingspeak.com/channels/{id}/feed.csv?apikey={api_key}&results={nb_of_results}&timezone={time_zone}"
    
    df = pd.read_csv(url)
    df.replace(['None'], np.nan, inplace=True)
    df.rename(columns=fields, inplace=True)
    df["Date/time"] = pd.to_datetime(df["Date/time"])
    df["TEMP [C]"] = df["TEMP [C]"].astype("float64")
    df["Relative humidity"] = df["TEMP [C]"].astype("float64")
    df.set_index("Date/time", inplace=True)
    df.index = df.index.tz_convert("Europe/Paris")
    df = df.assign(missing= np.nan)
    df.drop("missing", inplace=True, axis=1)
    
    if drop_entry_id:
        df.drop('entry_id', axis=1, inplace=True)

    if save:
        df.to_csv('df.csv')

    return df

In [43]:
def get_zue_data():
    df = pd.read_csv('ZUE.csv', delimiter=";")
    df["Date/time"] = pd.to_datetime(df["Date/time"], format='%d.%m.%Y %H:%M')
    df["TEMP [C]"] = df["TEMP [C]"].astype("float64")
    df.set_index("Date/time", inplace=True)
    df.index = df.index.tz_localize("Europe/Paris", ambiguous="NaT", nonexistent="shift_backward")
    df = df.assign(missing= np.nan)
    df.drop("missing", inplace=True, axis=1)

    return df


In [40]:
def extract_time_series(df: pd.DataFrame):
    pm10_series = df.loc[:, "PM10 [ug/m3]"]
    pm25_series = df.loc[:, "PM2.5 [ug/m3]"]
    temperature_series = df.loc[:, "TEMP [C]"]
    humidity_series = df.loc[:, "PREC [mm]"]

    return temperature_series, humidity_series, pm25_series, pm10_series

In [7]:
def merge_datasets(df_zue: pd.DataFrame, df_ts: pd.DataFrame) -> pd.DataFrame:
    return pd.concat([df_zue, df_ts])


In [44]:
df_zue = get_zue_data()
df_ts = get_thingspeak_data(id=CHANNEL_ID, api_key=API_KEY, save=True)

In [45]:
df_ts.columns

Index(['TEMP [C]', 'Relative humidity', 'PM1 [ug/m3]', 'PM2.5 [ug/m3]',
       'PM10 [ug/m3]'],
      dtype='object')

In [46]:
df = merge_datasets(df_ts=df_ts, df_zue=df_zue)
df.head()

Unnamed: 0_level_0,PM10 [ug/m3],PM2.5 [ug/m3],TEMP [C],PREC [mm],Relative humidity,PM1 [ug/m3]
Date/time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-09-01 01:00:00+02:00,2.9,2.1,14.4,0.0,,
2020-09-01 02:00:00+02:00,2.8,1.9,14.5,0.0,,
2020-09-01 03:00:00+02:00,2.9,1.6,14.4,0.0,,
2020-09-01 04:00:00+02:00,2.6,1.7,14.3,0.0,,
2020-09-01 05:00:00+02:00,2.7,1.7,14.0,0.0,,


In [55]:
df.plot()

In [23]:
temperature_series, humidity_series, pm25_series, pm10_series = extract_time_series(df)

In [24]:
temperature_series.plot()

In [48]:
humidity_series.plot()

In [49]:
pm25_series.plot()

In [50]:
pm10_series.plot()

In [51]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 7439 entries, 2020-09-01 01:00:00+02:00 to 2021-05-05 06:10:15+02:00
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   PM10 [ug/m3]       6141 non-null   float64
 1   PM2.5 [ug/m3]      7437 non-null   float64
 2   TEMP [C]           7428 non-null   float64
 3   PREC [mm]          5831 non-null   float64
 4   Relative humidity  1597 non-null   float64
 5   PM1 [ug/m3]        1607 non-null   float64
dtypes: float64(6)
memory usage: 406.8 KB


In [None]:
def compare_imputation_methods()

In [53]:
def calc_percent_NAs(df):
    nans = pd.DataFrame(df.isnull().sum().sort_values(ascending=False)/len(df), columns=['percent']) 
    idx = nans['percent'] > 0
    return nans[idx]

In [54]:
calc_percent_NAs(df)

Unnamed: 0,percent
Relative humidity,0.785321
PM1 [ug/m3],0.783976
PREC [mm],0.216158
PM10 [ug/m3],0.174486
TEMP [C],0.001479
PM2.5 [ug/m3],0.000269


In [16]:
def main():
    df = get_data(id=CHANNEL_ID, api_key=API_KEY)
    temperature_series, humidity_series, pm1_series, pm25_series, pm10_series = extract_time_series(df)
    temperature_series.plot()

if __name__ == "__main__":
    main()

NameError: name 'main' is not defined