# Compare Calibrations

**Import modules**

In [None]:
from SOAI.handler.SOAIDiskHandler import SOAIDiskHandler
from SOAI.handler.SOAIDBHandler import SOAIDBHandler
from SOAI.sensors.SOAISensorNetwork import SOAISensorNetwork
  
import os    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import subplots
    
import logging
logger = logging.getLogger()

# $\color{red}{\text{Set up everything}}$

**Load data from disk**

In [None]:
diskHandler = SOAIDiskHandler()
 
# OpenAir data
dfOpenAir = diskHandler.fGetOpenAir()
dfOpenAir = dfOpenAir.drop(["pm10", "pm25", "rssi"], axis=1)
dfOpenAirSensors = diskHandler.fGetOpenAirSensors()
  
# Lanuv data
dfLanuv = diskHandler.fGetLanuv()
dfLanuv = dfLanuv.drop(["NO", "OZON"], axis=1)
dfLanuvSensors = diskHandler.fGetLanuvSensors()

**Start sensor network**

This will read the given config file and start each sensor. All calibration models will be loaded as well.

In [None]:
network = SOAISensorNetwork(os.environ.get("SOAI")+"/configs/sensorNetwork.conf")
network.fCheckNetwork()  # Checks if everything is consistent

# $\color{red}{\text{Examine the calibrations}}$

$\color{red}{\text{In the following we will deal with SOAISensor and their inherited classes.}}$

**Select OpenAir and Lanuv sensor which have a close distance.**

Since the distance between a OpenAir and Lanuv sensor will have a strong impact on the calibration, it is convinient to start with the best case scenarios.

In [None]:
# Each returned element is a list with N (function argument) elements
sensorsOpenAir, sensorsLanuv, distances = network.fGetSmallestSensorPairs("OpenAirCologne", "Lanuv", N=5)

# Explicitly find the closest pair of sensors
idxClosest = np.argmin(distances)
closestOpenAirSensor = sensorsOpenAir[idxClosest]
closestLanuvSensor = sensorsLanuv[idxClosest]

**For each element filter the initial data (retrieved from the SOAIDiskHandler).**

Save each data frame as an element in a list.

In [None]:
listOpenAirData = [dfOpenAir[dfOpenAir["sensorID"] == sensor.fGetID()] for sensor in sensorsOpenAir]
listLanuvData = [dfLanuv[dfLanuv["sensorID"] == sensor.fGetID()] for sensor in sensorsLanuv]

**For each sensor and their data, apply the calibration model.** 

The calibration models are loaded by the SOAISensorNetwork. The resulting NO2 values in the data frame as well.

In [None]:
for sensor, data in zip(sensorsOpenAir, listOpenAirData):
    no2 = np.ones((len(data)))*(-1) # Intermediate saving array for resulting NO2 values
    
    # Check if sensor is active (e.g. has calibration model assigned)
    if sensor.fIsActive() is False:
        logger.warning(f"Can't process unactive sensor with feed {sensor.fGetID()}.")
        continue
        
    # Apply the calibration model
    countTEMP = 0
    for k, row in data.iterrows():
        no2[countTEMP] = sensor.fDataToNO2(row)
        countTEMP += 1
        
    # Set the no2 values as a column in the data frame
    data.loc[:,"no2"] = no2
    
    print(sensor.fGetID())
    print(data)

**Do the very same again, but use always the calibration model of the closest sensor pair.**

The closest sensor pair is already determined above.

In [None]:
for sensor, data in zip(sensorsOpenAir, listOpenAirData):
    no2 = np.ones((len(data)))*(-1) # Intermediate saving array for resulting NO2 values
    
    # Check if sensor is active (e.g. has calibration model assigned)
    if sensor.fIsActive() is False:
        logger.warning(f"Can't process unactive sensor with feed {sensor.fGetID()}.")
        continue
        
    # Set a new calibration model, which is taken from the smalles sensor pair
    sensor.fSetCalibration(closestOpenAirSensor.fGetCalibration())
        
    # Apply the calibration model
    countTEMP = 0
    for k, row in data.iterrows():
        no2[countTEMP] = sensor.fDataToNO2(row)
        countTEMP += 1
        
    # Set the no2 values as a column in the data frame
    data.loc[:,"no2_bestCalib"] = no2

**The calibrated data is now combined with the Lanuv data.**

This is needed for a good comparison later on.

In [None]:
listCombinedData = []
for dataOpen, dataLanuv in zip(listOpenAirData, listLanuvData):
    # Resample the time index, since they could have different missing values.
    # After resampling, the index axis should match (nearly).
    dataOpen.resample("1h").mean()
    dataLanuv.resample("1h").mean()
    
    # Since one sensor could have measured longer/shorter the intersection of the index values needs to be determined.
    timeMin = max(dataOpen.index.min(), dataLanuv.index.min())
    timeMax = min(dataOpen.index.max(), dataLanuv.index.max())
    dataLanuv = dataLanuv.loc[(dataLanuv.index >= timeMin) & (dataLanuv.index <= timeMax)]
    dataOpen = dataOpen.loc[(dataOpen.index >= timeMin) & (dataOpen.index <= timeMax)]
    
    # Fill all the important values in a new data frame
    dataCombined = pd.DataFrame(index=dataOpen.index)
    dataCombined["r2"] = dataOpen["r2"]
    dataCombined["temp"] = dataOpen["temp"]
    dataCombined["hum"] = dataOpen["hum"]
    dataCombined["no2"] = dataOpen["no2"]
    dataCombined["no2_bestCalib"] = dataOpen["no2_bestCalib"]
    dataCombined["no2_lanuv"] = dataLanuv["no2"]
    
    # Append to the list
    listCombinedData.append(dataCombined)

# $\color{red}{\text{Visualization}}$

**Comparison of the r2 values of the intrinsic sensor data.**

In [None]:
# Comparison of the r2 value
fig, ax = subplots()
for i, data in enumerate(listCombinedData):
    data["r2"].plot(figsize=(20,10), fontsize=20, ax=ax)
    
ax.set_title("Comparison of the r2 value", fontsize=25)
ax.set_ylabel("r2", fontsize=20)
ax.set_xlabel("")
ax.legend([sensor.fGetID() for sensor in sensorsOpenAir], loc='upper right', fontsize=20)
plt.show()

# Comparison of the temperature
fig, ax = subplots()
for i, data in enumerate(listCombinedData):
    data["temp"].plot(figsize=(20,10), fontsize=20, ax=ax)
    
ax.set_title("Comparison of the temperature", fontsize=25)
ax.set_ylabel("temp", fontsize=20)
ax.set_xlabel("")
ax.legend([sensor.fGetID() for sensor in sensorsOpenAir], loc='upper right', fontsize=20)
plt.show()

# Comparison of the humidity
fig, ax = subplots()
for i, data in enumerate(listCombinedData):
    data["hum"].plot(figsize=(20,10), fontsize=20, ax=ax)
    
ax.set_title("Comparison of the humidity", fontsize=25)
ax.set_ylabel("hum", fontsize=20)
ax.set_xlabel("")
ax.legend([sensor.fGetID() for sensor in sensorsOpenAir], loc='upper left', fontsize=20)
plt.show()

**Comparison of the calibrations to the Lanuv data.**

In [None]:
for i, data in enumerate(listCombinedData):
    fig, ax = subplots()
    data["no2"].plot(figsize=(20,10), fontsize=20, ax=ax)
    data["no2_bestCalib"].plot(figsize=(20,10), fontsize=20, ax=ax)
    data["no2_lanuv"].plot(figsize=(20,10), fontsize=20, ax=ax)
    ax.set_ylabel("NO2", fontsize=20)
    ax.legend(["NO2 own calibration", "NO2 best calibration", "NO2 Lanuv"], loc='upper left', fontsize=20)
    ax.set_title(distances[i], fontsize=20)
    plt.show()