In [42]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle

In [43]:
def build_sequences(local_data, window=28, stride=28):
    mat_data = []
    idx = 0
    while idx+window <= local_data.shape[0]:
        mat_data.append(local_data[idx:idx+window])
        idx += stride
    mat_data = np.array(mat_data)
    return mat_data

In [44]:
def build_dataset(data_dict, T, window=28, stride=28):
    INPUTDIM = (28,40)
    OUTPUTDIM = (28,7)
    population = pd.read_csv('..\data\Popolazione_province.csv', index_col=0)

    # Create training set
    mat_data_ = build_sequences(data_dict['BO'][0:T], window=window, stride=stride)
    n_windows = mat_data_.shape[0]
    test_labels = ['MO','RO', 'VI', 'AL', 'BI', 'PN', 'MB']
    input_train = np.ones((n_windows,INPUTDIM[0],INPUTDIM[1]))
    output_train = np.ones((n_windows,OUTPUTDIM[0], OUTPUTDIM[1]))
    i=0
    j=0
    for sigla, local_data in data_dict.items():
        local_data = local_data[0:T]/population['Residenti'][sigla]*100000
        mat_data = build_sequences(local_data, window=window, stride=stride)
        if not sigla in test_labels:
            input_train[:,:,i]=mat_data
            i += 1
        else:
            output_train[:,:,j]=mat_data
            j += 1

    # Create test set
    mat_data_ = build_sequences(data_dict['BO'][T:], window=window, stride=stride)
    n_windows_test = mat_data_.shape[0]
    input_test = np.ones((n_windows_test,INPUTDIM[0],INPUTDIM[1]))
    output_test = np.ones((n_windows_test,OUTPUTDIM[0], OUTPUTDIM[1]))
    i=0
    j=0
    for sigla, local_data in data_dict.items():
        local_data = local_data[T:]/population['Residenti'][sigla]*100000
        mat_data = build_sequences(local_data, window=window, stride=stride)
        if not sigla in test_labels:
            input_test[:,:,i]=mat_data
            i += 1
        else:
            output_test[:,:,j]=mat_data
            j += 1
    
    return input_train, output_train, input_test, output_test

In [45]:
# Load the dictionary from the file
with open('..\data\provinces_fit.pickle', 'rb') as file:
    provinces_fitted = pickle.load(file)

fit_data_dict = {}
for sigla, object in provinces_fitted.items():
    fit_data_dict.update({sigla: object.predict(np.arange(0,1173))})

In [46]:
input_train, output_train, input_test, output_test = build_dataset(fit_data_dict, T=800)

In [47]:
# # Load the dictionary from the file
# with open('..\data\mat_dict.pickle', 'rb') as file:
#     mat_dict = pickle.load(file)
# n_windows = mat_dict['BO'].shape[0]

# INPUTDIM = (28,40)
# OUTPUTDIM = (28,7)
# test_labels = ['MO','RO', 'VI', 'AL', 'BI', 'PN', 'MB']


In [48]:
# # Create traing set

# input_train = np.ones((n_windows,INPUTDIM[0],INPUTDIM[1]))
# output_train = np.ones((n_windows,OUTPUTDIM[0], OUTPUTDIM[1]))
# i=0
# j=0
# for sigla, local_data in mat_dict.items():
#     if not sigla in test_labels:
#         input_train[:,:,i]=local_data
#         i += 1
#     else:
#         output_train[:,:,j]=local_data
#         j += 1

In [49]:
# # creating trimmed dataset for test set

# # Load the dictionary from the file
# with open('../data/new_pos_dict.pickle', 'rb') as file:
#     new_pos_dict = pickle.load(file)

# T = 1200
# t0 = 801
# mat_dict_test = build_sequences(new_pos_dict, t0, T, stride=14)
# n_windows_test = mat_dict_test['BO'].shape[0]

In [50]:
# # Create test set
# input_test = np.ones((n_windows_test,INPUTDIM[0],INPUTDIM[1]))
# output_test = np.ones((n_windows_test,OUTPUTDIM[0], OUTPUTDIM[1]))
# i=0
# j=0
# for sigla, local_data in mat_dict_test.items():
#     if not sigla in test_labels:
#         input_test[:,:,i]=local_data
#         i += 1
#     else:
#         output_test[:,:,j]=local_data
#         j += 1

In [53]:
# Saving data
save_flag = False
#save_flag = True
if save_flag:
    np.save('../data/input_train.npy', input_train)
    np.save('../data/output_train.npy', output_train)
    np.save('../data/input_test.npy', input_test)
    np.save('../data/output_test.npy', output_test)
    print('Dataset saved')
else:
    print('Dataset NOT saved')

Dataset saved
