In [1]:
#----------------------------------------------------------------------
import pandas as pd 
import numpy as np 
#----------------------------------------------------------------------
import matplotlib.pyplot as plt 
import seaborn as sns 
import pandas_bokeh
pandas_bokeh.output_notebook()
pd.set_option('plotting.backend', 'pandas_bokeh')
# Create Bokeh-Table with DataFrame:
from bokeh.models.widgets import DataTable, TableColumn
from bokeh.models import ColumnDataSource
#----------------------------------------------------------------------
import datetime 
import requests
#----------------------------------------------------------------------
from sklearn.metrics import mean_absolute_error 
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import r2_score

In [2]:
def collect_data(url):
   feeds_ = requests.get(url).json()['feeds']
   feeds_data = pd.DataFrame(feeds_)
   # drop columns which will be unused: Latitude, longitude, gps, battery, pm10 values 
   feeds_data.drop(columns = ['field2','field4','field5','field6','field8','field7','entry_id'],inplace = True)
   #Rename fields correspomnding to pm2.5
   feeds_data.rename(columns ={'field1': 'sensor1_PM2.5','field3':'sensor2_PM2.5','created_at':'timestamp'},inplace = True)
   # converting the date field to timestamp 
   feeds_data['timestamp'] = pd.to_datetime(feeds_data['timestamp'])
   # setting time stamp as the index 
   feeds_data.set_index('timestamp',inplace = True)
   feeds_data = feeds_data.astype({'sensor1_PM2.5':'float','sensor2_PM2.5':'float'}).round(2)
   #feeds_data[['sensor1_PM2.5'],['sensor2_PM2.5']] = pd.to_numeric([['sensor1_PM2.5'],['sensor2_PM2.5']],errors = 'coerce')
   feeds_data = feeds_data.assign(average_PM2_5 = lambda x : (feeds_data['sensor1_PM2.5'] + feeds_data['sensor2_PM2.5'])/2)
   feeds_data.drop(columns = ['sensor1_PM2.5','sensor2_PM2.5'],inplace = True)
   return feeds_data 
   

In [3]:
def offtime_sampling(dataframe_ , on_time,off_time):# dataframe to get the samples from. Y- off time we are looking at 
    track_length = 0
    add_to_dataframe = 1
    sampled_dataframe = pd.DataFrame()
    while(track_length < 60):
        if (add_to_dataframe):
            new_sampled_dataframe = dataframe_[(( (track_length <= dataframe_.index.minute)&(dataframe_.index.minute < (track_length + on_time))))]
            sampled_dataframe = pd.concat([sampled_dataframe,new_sampled_dataframe])
            add_to_dataframe = 0
            track_length = track_length + on_time
        else:
            add_to_dataframe = 1 
            track_length = track_length + off_time 
    print(track_length) 
    sampled_dataframe.sort_index(ascending =True)
    sampled_dataframe.rename(columns ={"average_PM2_5":"\"average_PM2_5\"+ offtime "})
    return sampled_dataframe

In [4]:
def convert_frequency(dataframe_,frequency): 
    if(frequency == 'D'):
        df = dataframe_.resample('D')['average_PM2_5'].mean()
    elif (frequency == 'H'):
        df = dataframe_.resample('H')['average_PM2_5'].mean()
    return df  

In [5]:
def data_frame_visuals(merged_dataframe):
    #merged_dataframe.plot(title='Title',xlabel='Values',figsize =(10,6))
    print(merged_dataframe.plot_blokeh(kind="line"))

In [6]:
def parformance_comparisons(dataframe1,dataframe2):
    combined_dataset = pd.merge(dataframe1,dataframe2,on='TimeStamp')
    mae = mean_absolute_error( dataframe1["value_x"], dataframe2['value_y'])
    rmse = mean_squared_error(dataframe1["value_x"], dataframe2['value_y'])
    rsquared = r2_score(dataframe1["value_x"], dataframe2['value_y'])
    metric = ['mae','rmse','rsquared']
    values= [mae,rmse,rsquared]
    comparison_df = pd.DataFrame(list(zip(metric,values)),columns=['metris','value'])
    # get the list of tuples from two lists 
    # merge them using zip()
    return comparison_df
    

In [7]:
# data for analysis using the device AQ_G5133
AQ_G5133 = collect_data('https://thingspeak.com/channels/1962719/feeds.json?start=2023-02-18T00:00:00Z&end=2023-02-20T23:59:59Z&api_key=0IBG2XN6MIRMUMQ17')

In [8]:
five_mins_data = offtime_sampling(AQ_G5133,5,5)
fifteen_mins_off_data = offtime_sampling(AQ_G5133,5,15)
twenty_mins_off_data = offtime_sampling(AQ_G5133,5,20)

60
60
75


In [9]:
five_mins_data_hourly = convert_frequency(five_mins_data,'H')
five_mins_data_daily = convert_frequency(five_mins_data,'D')
fifteen_mins_off_data_hourly = convert_frequency(fifteen_mins_off_data,'H')
fifteen_mins_off_data_daily = convert_frequency(fifteen_mins_off_data,'D')
twenty_mins_off_data_hourly = convert_frequency(twenty_mins_off_data,'H')
twenty_mins_off_data_daily = convert_frequency(twenty_mins_off_data,'D')

                           average_PM2_5
timestamp                               
2023-02-18 00:01:54+00:00         85.440
2023-02-18 00:02:54+00:00         88.755
                           average_PM2_5
timestamp                               
2023-02-18 00:01:54+00:00         85.440
2023-02-18 00:02:54+00:00         88.755
                           average_PM2_5
timestamp                               
2023-02-18 00:01:54+00:00         85.440
2023-02-18 00:02:54+00:00         88.755


In [10]:
data_for_analysis = [AQ_G5133,five_mins_data,fifteen_mins_off_data,twenty_mins_off_data]
daily_data = convert_frequency(data_for_analysis,'D')
hourly_data = convert_frequency(data_for_analysis,'H') 
hourly_data.head()

Unnamed: 0_level_0,average_PM2_5,average_PM2_5,average_PM2_5,average_PM2_5
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-02-18 00:00:00+00:00,137.757692,129.921562,123.5995,118.901875
2023-02-18 01:00:00+00:00,142.230119,143.614545,147.658889,140.806
2023-02-18 02:00:00+00:00,138.364231,137.006522,136.446667,141.5765
2023-02-18 03:00:00+00:00,140.094111,139.291818,139.98375,141.1155
2023-02-18 04:00:00+00:00,156.6615,159.265,171.996667,151.349375


In [11]:
AQ_G5133.head(2)

Unnamed: 0_level_0,average_PM2_5
timestamp,Unnamed: 1_level_1
2023-02-18 00:01:54+00:00,85.44
2023-02-18 00:02:54+00:00,88.755
