In [128]:
import pandas as pd
import re

In [129]:
# Data path
DATA_PATH = "https://raw.githubusercontent.com/RThaweewat/KDAI-2022/main/AIT-Sensors.csv"

# Assign zone based on node
mapping_list = {
	'AIT10': '1MS', 'AIT11': '1MS', 'AIT12': '1MS', 'AIT13': '1MS', 'AIT14': '1MS',
	'AIT15': '3TS', 'AIT16': '3TS', 'AIT17': '3TS', 'AIT18': '4MG', 'AIT19': '2TS2'
}

# Set threshold
low_threshold = 30
high_threshold = 120

In [131]:
sensor_raw = pd.read_csv(DATA_PATH)
sensor_processed = (sensor_raw
                      .rename(columns=lambda x: re.sub('\W+', '_', x))
                      .rename(str.lower, axis='columns')
                      .rename(columns={'datetime_utc_7_': 'date_time'})
                      .assign(date_time=lambda x: pd.to_datetime(x['date_time']))
                      .assign(date=lambda x: x['date_time'].dt.date, hour=lambda x: x['date_time'].dt.hour)
                      .groupby(['date', 'hour', 'node'], as_index=False).mean()
                      # Drop unused columns
                      .drop(['timestamp', 'gpslat', 'gpsalt', 'gpslng'], axis=1)
                      # Change date type back to datetime64
                      .assign(date=lambda x: pd.to_datetime(x['date']))
                      # Add 2-digit for hour ex. 1 > 01
                      .assign(hour=lambda x: x['hour'].astype(str).str.zfill(2))
                      # Assign zone based on mapping list
                      .assign(zone=lambda x: x['node'].map(mapping_list))
                      # Assign PM2.5 Class based on threshold
                      .assign(pm2_5_class=lambda x: pd.cut(x['pm2_5']
                                                           , bins=[-1, low_threshold, high_threshold, float("inf")]
                                                           , labels=['Low', 'Medium', 'High']))
                    )

sensor_processed

Unnamed: 0,date,hour,node,temperature,humidity,airpressure,pm2_5,pm10,pm1_0,co,co2,hcho,zone,pm2_5_class
0,2018-03-17,00,AIT11,30.941176,57.250000,987.373529,107.088235,115.000000,70.058824,21.744118,423.411765,0.382353,1MS,Medium
1,2018-03-17,00,AIT12,27.394118,53.661765,986.361765,65.676471,70.470588,44.970588,15.558824,953.941176,173.117647,1MS,Medium
2,2018-03-17,00,AIT13,26.642857,57.537143,988.322857,69.485714,74.085714,47.457143,14.702857,1026.885714,154.257143,1MS,Medium
3,2018-03-17,01,AIT11,30.567647,57.770588,986.970588,101.617647,108.411765,66.676471,21.676471,427.411765,0.000000,1MS,Medium
4,2018-03-17,01,AIT12,27.488235,57.520588,986.002941,61.735294,65.588235,42.235294,15.350000,970.323529,150.911765,1MS,Medium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5815,2018-04-17,23,AIT13,33.300000,49.326471,576.323529,90.294118,94.970588,64.235294,10.505882,426.676471,0.000000,1MS,Medium
5816,2018-04-17,23,AIT14,31.165714,55.002857,989.054286,77.571429,82.885714,51.914286,29.582857,425.742857,0.000000,1MS,Medium
5817,2018-04-17,23,AIT15,29.196970,56.739394,986.732353,136.941176,145.147059,96.000000,16.176471,457.941176,26.516129,3TS,High
5818,2018-04-17,23,AIT17,31.067647,51.508824,987.152941,84.029412,89.882353,59.264706,13.858824,460.294118,0.000000,3TS,Medium
