**Import statements**


In [1]:
import pandas as pd
from datetime import datetime
from datetime import timedelta
import numpy as np
import pykrige.kriging_tools as kt
from pykrige.ok import OrdinaryKriging
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

import constants
import functions2 as functions

%matplotlib inline

**Load Data**

In [2]:
all_static_data = functions.load_static_sensors_calibrated()
all_static_data = functions.transform_to_grid_coordinates(all_static_data)
mobile_sensor_data = functions.load_mobile_sensors()
mobile_sensor_data = functions.transform_to_grid_coordinates(mobile_sensor_data)

**Compute predictions for stationary sensors**

In [3]:
# Hyperparameter search definition

cs=[0.00002, 0.0002, 0.002, 0.02, 0.2, 2]
epsilons=[0.1, 0.5, 1, 2.5, 5]
# This is not needed, it's been verified it works better with epsilon_insensitive
# Also saves time
#losses=['epsilon_insensitive','squared_epsilon_insensitive']
losses=['epsilon_insensitive']
include_temp_hum = [False, True]
input_window_sizes = range(8,50, 8)

In [4]:
def test_on_data(c, epsilon, loss, temp_hum, window_size, all_static_data):
    mae = []
    mse = []
    squares = []
    pred=[]
    values_read=[]


    start_time = '2018-07-03 00:00:00'
    start_window = '2018-07-05 12:00:00'
    end_window = '2018-07-05 12:15:00'
    offset_timeint = 1

    number_of_windows = 100
    window = 15
    par_grid = functions.create_par(c, epsilon, loss)
    timeint_on_first_window = 0

    for i in range(1, number_of_windows+1):
        end_time = start_window
        # treinar sem dados moveis
        # testar em dados fixos em vez de móveis
        timeint, par_grid = functions.train(all_static_data, None, start_time, end_time, par_grid, window, timeint_on_first_window)
        to_test = functions.test_mobile(par_grid, timeint, all_static_data, start_window, end_window, window)
        squares.append(to_test.shape[0])
        mae.append(mean_absolute_error(to_test['PM2.5'], to_test['pred_PM2.5']))
        mse.append(mean_squared_error(to_test['PM2.5'], to_test['pred_PM2.5']))
        pred.append(to_test['pred_PM2.5'].values[0])
        values_read.append(to_test['PM2.5'].values[0])


        start_time = end_time
        start_window = (datetime.strptime(start_window, '%Y-%m-%d %H:%M:%S') + timedelta(minutes=window)).strftime("%Y-%m-%d %H:%M:%S")
        end_window = (datetime.strptime(end_window, '%Y-%m-%d %H:%M:%S') + timedelta(minutes=window)).strftime("%Y-%m-%d %H:%M:%S")
        timeint_on_first_window = timeint+1
    return np.mean(mae), np.mean(mse)

In [5]:
results = pd.DataFrame(columns=["C", "epsilon", "loss", "temp_hum", "window_size", "mae", "mse"])
for c in cs:
    for epsilon in epsilons:
        for loss in losses:
            # for temp_hum in include_temp_hum:
                # for window_size in input_window_sizes:
                    temp_hum=False
                    window_size=0
                    mae, mse = test_on_data(c, epsilon, loss, temp_hum, window_size, all_static_data)
                    results = results.append({'C': c,
                                             'epsilon': epsilon,
                                             'loss': loss,
                                             'temp_hum': temp_hum,
                                             'window_size': window_size,
                                             'mae': mae,
                                             'mse': mse}, ignore_index=True)
                    print(mae)

0.7350746200961072
0.7350746200961072
0.8902839406243604
1.023627802890817
1.023627802890817
0.28449873219274785
0.2750604878246632
0.8581866812076332
1.023627802890817
1.023627802890817
0.30792917667046754
0.2663017008102313
0.8583822226434843
1.023627802890817
1.023627802890817
0.3080129186534631
0.2666573299489804
0.8583822226434843
1.023627802890817
1.023627802890817
0.3080129186534631
0.2666573299489804
0.8583822226434843
1.023627802890817
1.023627802890817
0.3080129186534631
0.2666573299489804
0.8583822226434843
1.023627802890817
1.023627802890817


In [6]:
results.to_csv('results/hyperparameter_search.csv', index=False)
results

Unnamed: 0,C,epsilon,loss,temp_hum,window_size,mae,mse
0,2e-05,0.1,epsilon_insensitive,False,0,0.735075,0.679732
1,2e-05,0.5,epsilon_insensitive,False,0,0.735075,0.679732
2,2e-05,1.0,epsilon_insensitive,False,0,0.890284,0.944505
3,2e-05,2.5,epsilon_insensitive,False,0,1.023628,1.211358
4,2e-05,5.0,epsilon_insensitive,False,0,1.023628,1.211358
5,0.0002,0.1,epsilon_insensitive,False,0,0.284499,0.101955
6,0.0002,0.5,epsilon_insensitive,False,0,0.27506,0.135833
7,0.0002,1.0,epsilon_insensitive,False,0,0.858187,0.885466
8,0.0002,2.5,epsilon_insensitive,False,0,1.023628,1.211358
9,0.0002,5.0,epsilon_insensitive,False,0,1.023628,1.211358
