In [15]:
#----------------------------------------------------------------------
import pandas as pd 
import numpy as np 
#----------------------------------------------------------------------
import matplotlib.pyplot as plt 
import seaborn as sns 
import pandas_bokeh
pandas_bokeh.output_notebook()
pd.set_option('plotting.backend', 'pandas_bokeh')
# Create Bokeh-Table with DataFrame:
from bokeh.models.widgets import DataTable, TableColumn
from bokeh.models import ColumnDataSource
#----------------------------------------------------------------------
import datetime 
import requests
#----------------------------------------------------------------------
from sklearn.metrics import mean_absolute_error 
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import r2_score
#---------------------------------------------------------------------
from functools import reduce

In [16]:
def collect_data(url):
   feeds_ = requests.get(url).json()['feeds']
   feeds_data = pd.DataFrame(feeds_)
   # drop columns which will be unused: Latitude, longitude, gps, battery, pm10 values 
   feeds_data.drop(columns = ['field2','field4','field5','field6','field8','field7','entry_id'],inplace = True)
   #Rename fields correspomnding to pm2.5
   feeds_data.rename(columns ={'field1': 'sensor1_PM2.5','field3':'sensor2_PM2.5','created_at':'timestamp'},inplace = True)
   # converting the date field to timestamp 
   feeds_data['timestamp'] = pd.to_datetime(feeds_data['timestamp'])
   # setting time stamp as the index 
   feeds_data.set_index('timestamp',inplace = True)
   feeds_data = feeds_data.astype({'sensor1_PM2.5':'float','sensor2_PM2.5':'float'}).round(2)
   #feeds_data[['sensor1_PM2.5'],['sensor2_PM2.5']] = pd.to_numeric([['sensor1_PM2.5'],['sensor2_PM2.5']],errors = 'coerce')
   feeds_data = feeds_data.assign(average_PM2_5 = lambda x : (feeds_data['sensor1_PM2.5'] + feeds_data['sensor2_PM2.5'])/2)
   feeds_data.drop(columns = ['sensor1_PM2.5','sensor2_PM2.5'],inplace = True)
   return feeds_data 
   

In [17]:
def offtime_sampling(dataframe_ , on_time,off_time):# dataframe to get the samples from. Y- off time we are looking at 
    track_length = 0
    add_to_dataframe = 1
    sampled_dataframe = pd.DataFrame()
    while(track_length < 60):
        if (add_to_dataframe):
            new_sampled_dataframe = dataframe_[(( (track_length <= dataframe_.index.minute)&(dataframe_.index.minute < (track_length + on_time))))]
            sampled_dataframe = pd.concat([sampled_dataframe,new_sampled_dataframe])
            add_to_dataframe = 0
            track_length = track_length + on_time
        else:
            add_to_dataframe = 1 
            track_length = track_length + off_time 
    print(track_length) 
    sampled_dataframe.sort_index(ascending =True)
    sampled_dataframe.rename(columns ={"average_PM2_5":"\"average_PM2_5\"+ offtime "})
    return sampled_dataframe

In [18]:
def convert_frequency(dataframe_,frequency): 
    if(frequency == 'D'):
        df = dataframe_.resample('D')['average_PM2_5'].mean()
    elif (frequency == 'H'):
        df = dataframe_.resample('H')['average_PM2_5'].mean()
    return pd.DataFrame(df)  

In [19]:
def data_frame_visuals(merged_dataframe):
    #merged_dataframe.plot(title='Title',xlabel='Values',figsize =(10,6))
    print(merged_dataframe.plot_blokeh(kind="line"))

In [20]:
def parformance_comparisons(dataframe1,dataframe2):
    #combined_dataset = pd.merge(dataframe1,dataframe2,on='TimeStamp')
    mae = mean_absolute_error( dataframe1['average_PM2_5'], dataframe2['average_PM2_5'])
    rmse = mean_squared_error(dataframe1['average_PM2_5'], dataframe2['average_PM2_5'])
    rsquared = r2_score(dataframe1['average_PM2_5'], dataframe2['average_PM2_5'])
    metric = ['mae','rmse','rsquared']
    values= [mae,rmse,rsquared]
    comparison_df = pd.DataFrame(list(zip(metric,values)),columns=['metris','value'])
    # get the list of tuples from two lists 
    # merge them using zip()
    return comparison_df
    

In [21]:
# data for analysis using the device AQ_G5133
AQ_G5133 = collect_data('https://thingspeak.com/channels/1962719/feeds.json?start=2023-02-18T00:00:00Z&end=2023-02-20T23:59:59Z&api_key=0IBG2XN6MIRMUMQ17')

In [22]:
five_mins_data = offtime_sampling(AQ_G5133,5,5)
fifteen_mins_off_data = offtime_sampling(AQ_G5133,5,15)
twenty_mins_off_data = offtime_sampling(AQ_G5133,5,20)

60
60
75


In [23]:
five_mins_data_hourly = convert_frequency(five_mins_data,'H')
five_mins_data_daily = convert_frequency(five_mins_data,'D')
fifteen_mins_off_data_hourly = convert_frequency(fifteen_mins_off_data,'H')
fifteen_mins_off_data_daily = convert_frequency(fifteen_mins_off_data,'D')
twenty_mins_off_data_hourly = convert_frequency(twenty_mins_off_data,'H')
twenty_mins_off_data_daily = convert_frequency(twenty_mins_off_data,'D')
AQ_G5133_hourly = convert_frequency(AQ_G5133,'H')
AQ_G5133_daily = convert_frequency(AQ_G5133,'D')

In [24]:
no_off_vs_five_off = parformance_comparisons(AQ_G5133_hourly,five_mins_data_hourly)
no_off_vs_five_off 

Unnamed: 0,metris,value
0,mae,1.228654
1,rmse,3.75442
2,rsquared,0.997307


In [25]:
no_off_vs_fifteen_off = parformance_comparisons(AQ_G5133_hourly,fifteen_mins_off_data_hourly)
no_off_vs_fifteen_off 

Unnamed: 0,metris,value
0,mae,2.709738
1,rmse,18.585511
2,rsquared,0.986668


In [26]:
df_hourly = [AQ_G5133_hourly,five_mins_data_hourly,fifteen_mins_off_data_hourly]#,twenty_mins_off_data_hourly]
def plot_relations(df): #df should be a list of dataframes 
    data_merge = reduce(lambda left, right:pd.merge(left, right,on = ["timestamp"]),df)
    name={'average_PM2_5_x':'row_data_average','average_PM2_5_y':'5mins_off_average','average_PM2_5':'15mins_off_average'}
    data_merge.rename(columns=name,inplace=True)
    return data_merge.plot()

In [27]:
hourly_relations = plot_relations(df_hourly)
hourly_relations

In [28]:
df_daily = [AQ_G5133_daily,five_mins_data_daily,fifteen_mins_off_data_daily]
daily_relations = plot_relations(df_daily)
daily_relations

In [29]:
AQ_G5133_daily


Unnamed: 0_level_0,average_PM2_5
timestamp,Unnamed: 1_level_1
2023-02-18 00:00:00+00:00,87.540273
2023-02-19 00:00:00+00:00,62.535588
2023-02-20 00:00:00+00:00,51.826339
