# Sensor Calibration

### Calibration of six stationary and two mobile sensors during a period of colocation

In [1]:
##Adapted from Andrew Bates' code at https://github.com/specknet/airspeck-comparison/blob/master/6%20Airspeck%20S.ipynb
import os
from data_download import DataDownloader

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd

from datetime import datetime
import time

from bokeh.io import show, output_notebook, output_file, reset_output
from bokeh.plotting import figure

from bokeh.models import DatetimeTickFormatter

In [2]:
start_date = 20180601
end_date = 20180615

uuids = ["02E5F77764B873DA",
        "200A7CED9D597407",
        "E5FD8C55EAA37555",
        "AA0E63CF5118F98F",
        "B61241EF668DBC2C",
        "E786F1568F65C296" ]

sids = ['XXG161', 'XXG162']

factors = [[1.0 ,        1.0 ,       1.0 , 1.    ,     1.            ],
 [2.0100042 , 1.54961648 ,1.5126218 , 1.00494929, 1.00478554],
 [1.59078671, 1.21618292, 1.19189916, 1.01233884, 0.9918236],
 [2.93987177 ,2.3760729 , 2.31180713, 0.98098509, 1.03301718],
 [2.75341775 ,2.34367823, 2.21772871, 0.98226178, 1.01829024],
 [8.11136564, 7.08428589, 7.04657879, 0.96830023, 1.03546691],
 [5.05004303, 4.86456945, 4.72626118, 1.16005363, 0.74392267],
 [5.09431241, 4.90857996, 4.6303786, 1.18617219, 0.7362692  ]]

def toTimestamp(date):
    date = datetime.strptime(date.split(".")[0], "%Y-%m-%d %H:%M:%S")
    return date

data_dir = "/Users/zoepetard/Google Drive/Edinburgh/MscProj/FillingTheGaps/data/"+str(start_date)+"-"+str(end_date)+"/"

if not os.path.exists(data_dir):
    os.makedirs(data_dir)

## Download Data

In [None]:
#Download data
dataDownloader = DataDownloader()
#dataDownloader.loadAirSpeckS(start_date, end_date, data_dir)
#Once this data is downloaded, it can be retrieved from: data_dir + "/" + str(uuid) + '.csv'

#dataDownloader.loadAirSpeckP(start_date, end_date, sids, data_dir)
#Once this data is downloaded, it can be retrieved from: data_dir + "/" + str(sid) + '_' + str(date) + '.csv'


## Read Static CSV

In [3]:
sdata = []
#sdates = []
for i in range(len(uuids)):
    sdata.append(pd.read_csv(data_dir + uuids[i]+".csv"))
    absoluteTime = []
    for j in range(len(sdata[i])):
        absoluteTime.append(toTimestamp(sdata[i]["Timestamp"].values[j]))
    sdata[i]["absoluteTime"] = absoluteTime

## Read Mobile CSV

In [4]:
pdata = []
for i in range(len(sids)):
    pdata_by_day = []
    for j in range(20180605, 20180607): 
        #Only plot these two days because they cover the colocation period
        pdata_by_day.append(pd.read_csv(data_dir + "/" + str(sids[i]) + '_' + str(j) + '.csv'))
        
    pdata_by_sid = pd.concat(pdata_by_day)
    pdata.append(pdata_by_sid)

    absoluteTime = []
    for k in range(len(pdata[i])):
        absoluteTime.append(toTimestamp(pdata[i]["Timestamp"].values[k]))
    pdata[i]["absoluteTime"] = absoluteTime


## Plot Calibrated Data

In [6]:
plot_names = ["PM1","PM2.5", "PM10", "temperature", "humidity"]
CB_color_cycle = ['#377eb8', '#ff7f00', '#4daf4a', '#f781bf', '#a65628', '#984ea3', '#999999', '#e41a1c', '#dede00']

for pidx, plot_name in enumerate(plot_names):
    reset_output()
    output_file(data_dir + "/{}.html".format("ave_" + plot_name + "_" + str(start_date)))

    fig = figure(plot_width=1200, plot_height=600,x_axis_type="datetime", title="Average " + plot_name + " (" + str(start_date) + ")")    

    for i in range(len(sids)):
        # Plot uncalibrated data 
        #fig.line(pdata[i]["absoluteTime"].values, pdata[i][plot_name].values, legend='Personal: ' + sids[i], color=CB_color_cycle[len(uuids) + i])
        
        #Plot calibrated data
        fig.line(pdata[i]["absoluteTime"].values, pdata[i][plot_name].values / factors[i + len(uuids)][pidx], legend='Personal: ' + sids[i], color=CB_color_cycle[len(uuids) + i])

    for i in range(len(uuids)):
        # Plot uncalibrated data
        #fig.line(sdata[i]["absoluteTime"].values, sdata[i][plot_name].values, legend='Static: ' + uuids[i], color=CB_color_cycle[i])
        
        #Plot calibrated data
        fig.line(sdata[i]["absoluteTime"].values, sdata[i][plot_name].values / factors[i][pidx], legend='Static: ' + uuids[i], color=CB_color_cycle[i])

            
    fig.legend.location = "top_left"
    show(fig)

## Calibration calculations

In [8]:
#From  1528202000.0
#To    1528275000.0
# They were all happily colocated

def getColocatedSlice(data, ids):
    calibrationData = []
    for i in range(len(ids)):
        #print(data[i].shape)
        condition1 = data[i]["absoluteTime"] > 1528202100.0 
        condition2 = data[i]["absoluteTime"] < 1528275000.0
        condition = condition1 & condition2
        calibrationData.append(data[i][condition])
        print(calibrationData[i].shape)
        
    return calibrationData

sCalibrationData = getColocatedSlice(sdata, uuids)
pCalibrationData = getColocatedSlice(pdata, sids)
print(sCalibrationData[0].head(5))
print(sCalibrationData[0].tail(5))


TypeError: invalid type comparison

In [None]:
#Downsample pdata to every 5 mins
pDownsampledData = []
pCalibrationCopy = pCalibrationData

for i in range(len(sids)):
    newP = pCalibrationCopy[i].set_index(["Timestamp"])
    newP.index = pd.to_datetime(newP.index)
    #NOTE THIS IS DELETING LATITUDE IN PDATA[0] POSSIBLY BECAUSE OF A NAN
    pDownsampledData.append(newP.resample('5Min').mean())

In [None]:
PM_strs = ["PM1","PM2.5", "PM10", "temperature", "humidity"]
factors = np.empty((8,5))
for i in range(len(uuids)):
    for indx, PM_str in enumerate(PM_strs):
        data = sCalibrationData
        reference = sCalibrationData[0][PM_str].values
        reference = reference[~np.isnan(reference)]
        current = sCalibrationData[i][PM_str].values
        current = current[~np.isnan(current)]
        factor_array = current / reference
        factor = factor_array.mean()
        print("Sensor ID " + uuids[i] + " " + PM_str + " factor: " + str(factor))
        sCalibrationData[i][PM_str] /= factor
        factors[i][indx] = factor

for i in range(len(sids)):
    for indx, PM_str in enumerate(PM_strs):
        data = sCalibrationData
        reference = sCalibrationData[0][PM_str].values
        reference = reference[~np.isnan(reference)]
        current = pDownsampledData[i][PM_str].values
        current = current[~np.isnan(current)]
        factor_array = current / reference
        factor = factor_array.mean()
        print("Sensor ID " + sids[i] + " " + PM_str + " factor: " + str(factor))
        pDownsampledData[i][PM_str] /= factor
        factors[len(uuids) + i][indx] = factor
