In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
import sys
import tensorflow as tf
from tensorflow.keras import layers, Model

sys.path.append('..')
from src.data.make_dataset import make_dataset




In [2]:
DATA_PATH = '../data/processed/EEA-SK-Ba-trend.csv'
N_PAST = 48
N_FUTURE = 1

In [3]:
df = pd.read_csv(DATA_PATH)
# take only data until 2019 based on DatetimeBegin column
df['DatetimeBegin'] = pd.to_datetime(df['DatetimeBegin'])
df = df[df['DatetimeBegin'] < '2020-01-01']
df = df[['PM10 Concentration', 'PM2.5 Concentration', 'NO2 Concentration']]

In [4]:
train_X, train_Y, val_X, val_Y, test_X = make_dataset(data_src=df, n_past=N_PAST, n_future=N_FUTURE)

In [5]:
print(f'Train set shape: {train_X.shape}         Train labels shape: {train_Y.shape}')
print(f'Validation set shape: {val_X.shape}     Validation labels shape: {val_Y.shape}')
print(f'Test set shape: {test_X.shape}')

Train set shape: (12214, 48, 3)         Train labels shape: (12214, 1, 3)
Validation set shape: (2580, 48, 3)     Validation labels shape: (2580, 1, 3)
Test set shape: (2580, 48, 3)


In [6]:
train_X_PM10_PM25 = train_X[:, :, 0:2]
train_Y_PM10_PM25 = train_Y[:, :, 0:2]
val_X_PM10_PM25 = val_X[:, :, 0:2]
val_Y_PM10_PM25 = val_Y[:, :, 0:2]
test_X_PM10_PM25 = test_X[:, :, 0:2]
train_X_NO2 = train_X[:, :, 2:3]
train_Y_NO2 = train_Y[:, :, 2:3]
val_X_NO2 = val_X[:, :, 2:3]
val_Y_NO2 = val_Y[:, :, 2:3]
test_X_NO2 = test_X[:, :, 2:3]

print(f'PM10 & PM2.5\nTrain set shape: {train_X_PM10_PM25.shape}         Train labels shape: {train_Y_PM10_PM25.shape}')
print(f'Validation set shape: {val_X_PM10_PM25.shape}     Validation labels shape: {val_Y_PM10_PM25.shape}')
print(f'Test set shape: {test_X_PM10_PM25.shape}')
print(f'\nNO2\nTrain set shape: {train_X_NO2.shape}         Train labels shape: {train_Y_NO2.shape}')
print(f'Validation set shape: {val_X_NO2.shape}     Validation labels shape: {val_Y_NO2.shape}')
print(f'Test set shape: {test_X_NO2.shape}')

PM10 & PM2.5
Train set shape: (12214, 48, 2)         Train labels shape: (12214, 1, 2)
Validation set shape: (2580, 48, 2)     Validation labels shape: (2580, 1, 2)
Test set shape: (2580, 48, 2)

NO2
Train set shape: (12214, 48, 1)         Train labels shape: (12214, 1, 1)
Validation set shape: (2580, 48, 1)     Validation labels shape: (2580, 1, 1)
Test set shape: (2580, 48, 1)


In [None]:
in_scaler = StandardScaler()
out_scaler = StandardScaler()

train_X = train_X.reshape(-1, train_X.shape[-1])
train_X = in_scaler.fit_transform(train_X)
train_X = train_X.reshape(-1, N_PAST, train_X.shape[-1])

train_Y = train_Y.reshape(-1, train_Y.shape[-1])
train_Y = out_scaler.fit_transform(train_Y)
train_Y = train_Y.reshape(-1, N_FUTURE, train_Y.shape[-1])

val_X = val_X.reshape(-1, val_X.shape[-1])
val_X = in_scaler.transform(val_X)
val_X = val_X.reshape(-1, N_PAST, val_X.shape[-1])

val_Y = val_Y.reshape(-1, val_Y.shape[-1])
val_Y = out_scaler.transform(val_Y)
val_Y = val_Y.reshape(-1, N_FUTURE, val_Y.shape[-1])

real_values = test_X.copy()[:, -1, :]
test_X = test_X.reshape(-1, test_X.shape[-1])
test_X = in_scaler.transform(test_X)
test_X = test_X.reshape(-1, N_PAST, test_X.shape[-1])

In [None]:
print(f'Train set shape: {train_X.shape}         Train labels shape: {train_Y.shape}')
print(f'Validation set shape: {val_X.shape}     Validation labels shape: {val_Y.shape}')
print(f'Test set shape: {test_X.shape}')