In [3]:
import torch
from torch import nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')

import datetime
# auto reolad zewnetrznych skryptow bez resetowania notebooka
import sys
sys.path.append('..')
import models
from utils.modeling_utils import SequenceGeneratorCV

%reload_ext autoreload
%autoreload 2

DEVICE = "cpu"

# Data prep

In [4]:
df = pd.read_csv('../../data/csv/six_cities.csv')
df['dt'] = pd.to_datetime(df['dt'])

# tylko warszawa
df = df[df['city'].isin(['Warszawa'])]
df = df.reset_index(drop=True)
#df.head()

In [5]:
# na podstawie 96 godzin predykcja na dwa nastepne dni
input_width, output_width = 96, 48

# one moga zostac bez problemu jako stala czy cos, maja rozklady bardzo nienormalne
normalize_features = ['humidity', 'clouds.all', 'rain.1h', 'snow.1h', 'co', 'no', 'no2', 'so2', 'pm2_5', 'pm10', 'nh3']

# zmienne numeryczne ktore chcemy wykorzystac w modelu
numeric_features = ['day_sin', 'day_cos', 'week_sin', 'week_cos', 'month_sin', 'month_cos', 'co', 'no']

# zmienne kategoryczne ktore chcemy wykorzystac w modelu
categorical_features = ['state']

# zmienne ktorych chcemy robic predykcje
output_columns = ['co', 'no']

seq_gen = SequenceGeneratorCV(
    numeric_features = numeric_features, 
    categorical_features = categorical_features,
    output_features = output_columns,
    normalize_features = normalize_features,
    device = DEVICE 
)
seq_gen.init_preprocessor(df[:10])

In [6]:
cities_dfs, cv_indices = seq_gen.split_data(df, 5, False)
for idx_train, idx_val in cv_indices:
    df_train = pd.concat([city.loc[idx_train, :] for city in cities_dfs], axis=0)
    df_val = pd.concat([city.loc[idx_val, :] for city in cities_dfs], axis=0)
    df_train = seq_gen.preprocessor.fit_transform(df_train)
    df_val = seq_gen.preprocessor.transform(df_val)

    dataloader_train, dataloader_val = seq_gen.get_dataloaders(df_train, df_val)
    break

In [7]:
rnn_s2s = models.RNN_S2S(
     seq_gen, 50, 1, 1
)
for X, y in dataloader_train:
    y_p = rnn_s2s(X, y, 0.5)
    print(X.shape, y.shape, y_p.shape)
    break

torch.Size([144, 96, 9]) torch.Size([144, 48, 2]) torch.Size([144, 48, 2])
