**Import statements**


In [6]:
import pandas as pd
from datetime import datetime
from datetime import timedelta
import numpy as np
import pykrige.kriging_tools as kt
from pykrige.ok import OrdinaryKriging
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pickle

import constants
import functions5 as functions

%matplotlib inline

**Load Data**

In [7]:
all_static_data = functions.load_static_sensors_calibrated_2()
all_static_data = functions.transform_to_grid_coordinates(all_static_data)
mobile_sensor_data = functions.load_mobile_sensors_2()
mobile_sensor_data = functions.transform_to_grid_coordinates(mobile_sensor_data)

**Compute predictions for stationary sensors**

In [8]:
def correct(pred_value, error_dict, time=None):
    if len(error_dict) < 10:
        return pred_value
    if pred_value + np.mean([y for x,y in error_dict]) < 0:
    #if pred_value + max(error_dict, key=lambda item:item[0])[1] < 0:
        return 0.0
    return pred_value + np.mean([y for x,y in error_dict])
    #return pred_value + max(error_dict, key=lambda item:item[0])[1]

In [19]:
mae = []
mse = []
squares = []

start_time = '2018-07-23 00:00:00'
start_window = '2018-07-23 13:45:00'
end_window = '2018-07-23 14:00:00'

number_of_windows = 4
window = 15
par_grid = functions.create_par(c=0.00002, epsilon=0.001)

# Load the 1st dataset pickle file
grid_of_errors = pickle.load( open( "tmp_error_grid/grid_of_errors.p", "rb" ) )
# Empty error grid
#grid_of_errors = [[[] for x in range(20)] for y in range(20)]

timeint_on_first_window = 0
tmp_to_test_filename = 'tmp_to_test/with_mobile_data_{}_{}.csv'.format(start_window, number_of_windows)

for i in range(1, number_of_windows+1):
    end_time = start_window
    # treinar sem dados moveis
    # testar em dados fixos em vez de móveis
    #print(start_window)
    #print(end_window)
    timeint, par_grid, grid_of_errors = functions.train(all_static_data, mobile_sensor_data, start_time, end_time, par_grid, window, grid_of_errors, timeint_on_first_window)
    to_test = functions.test_mobile(par_grid, timeint, mobile_sensor_data, start_window, end_window, 1)
    #print(to_test)
    
    # Calcular o erro numa das colunas do to_test
    to_test['error_PM2.5'] = to_test['PM2.5'] - to_test['pred_PM2.5']
    to_test['start_window'] = start_window
    
    # Guardar os erros raw numa coluna pq depois vou descontar os bias na pred_PM2.5
    to_test['pred_PM2.5_raw'] = to_test['pred_PM2.5']
    
    # Guardar o tempo em que aconteceu
    to_test['Timestamp'] = start_window
    
    # Corrigir as previsões com os erros
    for index, row in to_test.iterrows():
        # aplicar a função correct que aplica a função mean
        flatten = lambda l: [item for sublist in l for item in sublist]
        all_errors = flatten([grid_of_errors[i][j] for i in range(20) for j in range(20)])
        to_test.at[index, 'pred_PM2.5'] = correct(row['pred_PM2.5'], all_errors)
        
    # Meter os erros na grid
    for index, row in to_test.iterrows():
        #O uso de index deve ser mudado para o timestamp
        grid_of_errors[int(row['lat_grid'])][int(row['long_grid'])].append((row['start_window'], row['error_PM2.5']))
        if len(grid_of_errors[int(row['lat_grid'])][int(row['long_grid'])]) > 10:
            grid_of_errors[int(row['lat_grid'])][int(row['long_grid'])].pop(0)
        
    squares.append(to_test.shape[0])
    mae.append(mean_absolute_error(to_test['PM2.5'], to_test['pred_PM2.5']))
    mse.append(mean_squared_error(to_test['PM2.5'], to_test['pred_PM2.5']))

    start_time = end_time
    start_window = (datetime.strptime(start_window, '%Y-%m-%d %H:%M:%S') + timedelta(minutes=15)).strftime("%Y-%m-%d %H:%M:%S")
    end_window = (datetime.strptime(end_window, '%Y-%m-%d %H:%M:%S') + timedelta(minutes=15)).strftime("%Y-%m-%d %H:%M:%S")
    timeint_on_first_window = timeint+1
    

In [20]:
#grid_of_errors[5][12]

In [21]:
print(mae)
print(squares)

[1.3871021426252597, 0.6128872154349126, 0.7070788578019068, 0.3986216386637037]
[1, 39, 45, 2]
