## Libs

In [20]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
#from sklearn.metrics import 

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

In [21]:
from functools import partial
from itertools import chain

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

%matplotlib inline
np.random.seed(1)

-----
## Train, valid split

In [22]:
pars_smp_train = np.load('data/pars_smp_train.npy')
y_smp_train = np.load('data/y_smp_train.npy')

In [23]:
pars_smp_train.shape, y_smp_train.shape

((1000000, 15, 1), (1000000, 200, 3))

In [24]:
test_data_size = 1000
small_pars_smp_train = pars_smp_train[:test_data_size].copy()
small_y_smp_train = y_smp_train[:test_data_size].copy()

In [25]:
if test_data_size != 1000000:
    X_train, X_valid, y_train, y_valid = train_test_split(small_pars_smp_train, small_y_smp_train, test_size=0.2, shuffle=False, random_state=178)
else:
   X_train=small_pars_smp_train
   y_train=small_y_smp_train
   X_valid = np.array(0)
   y_valid = np.array(0)

In [26]:
X_train.shape, y_train.shape, X_valid.shape, y_valid.shape

((800, 15, 1), (800, 200, 3), (200, 15, 1), (200, 200, 3))

In [27]:
type(small_y_smp_train)

numpy.ndarray

In [28]:
type(X_train)

numpy.ndarray

In [29]:
use_cuda = True
if use_cuda:
    device = 'cuda'
else:
    device = 'cpu'

In [30]:
torch.cuda.is_available()

True

## Build the Model

In [31]:
class q_model(nn.Module):
    def __init__(self, 
                 quantiles, 
                 in_shape=1,  
                 dropout=0.5):     
        super().__init__()
        self.quantiles = quantiles
        self.num_quantiles = len(quantiles)
        
        self.in_shape = in_shape
        self.out_shape = len(quantiles)
        self.dropout = dropout
        self.build_model()
        self.init_weights()
        
    def build_model(self): 
        # self.base_model = nn.Sequential(
        #     nn.Linear(self.in_shape, 64),
        #     nn.ReLU(),
        #     # nn.BatchNorm1d(64),
        #     nn.Dropout(self.dropout),
        #     nn.Linear(64, 64),
        #     nn.ReLU(),
        #     # nn.BatchNorm1d(64),
        #     nn.Dropout(self.dropout),
        # )
        self.base_model = nn.LSTM(3, 128, 2, batch_first=True)#input in 3 
        final_layers = [
            nn.Linear(128, 15) for _ in range(len(self.quantiles))#output in 15 
        ]
        self.final_layers = nn.ModuleList(final_layers)
        
    def init_weights(self):
        for m in chain(self.final_layers):
            if isinstance(m, nn.Linear):
                nn.init.orthogonal_(m.weight)
                nn.init.constant_(m.bias, 0)        
        
    # def forward(self, x):
    #     tmp_ = self.base_model(x)
    #     return torch.cat([layer(tmp_) for layer in self.final_layers], dim=1)
    
    def forward(self, x):
        out, _ = self.base_model(x)
        #out = self.final_layers(out[:, -1, :])  # Используйте только последний временной шаг для предсказания
        return torch.stack([layer(out[:, -1, :]) for layer in self.final_layers], dim=1)

In [32]:
class QuantileLoss(nn.Module):
    def __init__(self, quantiles):
        super().__init__()
        self.quantiles = quantiles
        
    def forward(self, preds, target):
        assert not target.requires_grad
        assert preds.size(0) == target.size(0)
        losses = []
        for i, q in enumerate(self.quantiles):
            errors = target - preds[:, i]
            losses.append(torch.max((q-1) * errors, q * errors).unsqueeze(1))
        loss = torch.mean(torch.sum(torch.cat(losses, dim=1), dim=1))
        return loss

In [33]:
import tqdm
class Learner:
    def __init__(self, model, optimizer_class, loss_func, device='cpu'):
        self.model = model.to(device)
        self.optimizer = optimizer_class(self.model.parameters())
        self.loss_func = loss_func.to(device)
        self.device = device
        self.loss_history = []
        
    def fit(self, x, y, epochs, batch_size):
        self.model.train()
        for e in tqdm.tqdm(range(epochs)):
            shuffle_idx = np.arange(x.shape[0])
            np.random.shuffle(shuffle_idx)
            x = x[shuffle_idx]
            y = y[shuffle_idx]
            epoch_losses = []
            for idx in range(0, x.shape[0], batch_size):
                self.optimizer.zero_grad()
                batch_x = torch.from_numpy(
                    x[idx : min(idx + batch_size, x.shape[0]),:]
                ).float().to(self.device).requires_grad_(False)
                #print(batch_x)
                #print(type(batch_x))
                batch_y = torch.from_numpy(
                    y[idx : min(idx + batch_size, y.shape[0])]
                ).float().to(self.device).requires_grad_(False)
                preds = self.model(batch_x)
                loss = self.loss_func(preds, batch_y)
                loss.backward()
                self.optimizer.step()
                epoch_losses.append(loss.cpu().detach().numpy())                                
            epoch_loss =  np.mean(epoch_losses)
            self.loss_history.append(epoch_loss)
            print("Epoch {}: {}".format(e+1, epoch_loss))
                
    def predict(self, x, mc=False):
        if mc:
            self.model.train()
        else:
            self.model.eval()
        return self.model(torch.from_numpy(x).float().to(self.device).requires_grad_(False)).cpu().detach().numpy()

## Setup Learner class

In [34]:
# Instantiate model
quantiles = [.1, .25, .50,.75,.90]
model = q_model(quantiles, dropout=0.1)
loss_func = QuantileLoss(quantiles)
learner = Learner(model, partial(torch.optim.Adam, weight_decay=1e-6), loss_func, device=device).to(device=device)

AttributeError: 'Learner' object has no attribute 'to'

## Train the Model

In [None]:
print(X_train.shape, y_train.shape)

(1000000, 15, 1) (1000000, 200, 3)


In [None]:
X_train.squeeze(2).shape

(1000000, 15)

In [None]:
# Run training
epochs = 10
learner.fit(y_train, X_train.squeeze(2), epochs, batch_size=100)

  0%|          | 0/10 [03:23<?, ?it/s]


KeyboardInterrupt: 

In [None]:
y_valid.shape

(20000, 200, 3)

In [None]:
y_train.shape

(80000, 200, 3)

In [None]:
learner

<__main__.Learner at 0x1389e1969d0>

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
import pickle
filename = "learner.class"
file = open(filename, 'wb') 
pickle.dump(learner, file=file)


## Predict

## Other

In [None]:
tmp.shape

In [None]:
1

In [None]:
# Make the prediction on the meshed x-axis
tmp = learner.predict(xx)
y_lower, y_pred, y_upper = tmp[:, 0], tmp[:, 2], tmp[:, 4]

# Plot the function, the prediction and the 90% confidence interval based on
# the MSE
fig = plt.figure()
plt.plot(xx, f(xx), 'g:', label=u'$f(x) = x\,\sin(x)$')
plt.plot(X, y, 'b.', markersize=10, label=u'Observations')
plt.plot(xx, y_pred, 'r-', label=u'Prediction')
plt.plot(xx, y_upper, 'k-')
plt.plot(xx, y_lower, 'k-')
plt.fill(np.concatenate([xx, xx[::-1]]),
         np.concatenate([y_upper, y_lower[::-1]]),
         alpha=.5, fc='b', ec='None', label='90% prediction interval')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.ylim(-10, 20)
plt.legend(loc='upper left')
plt.show()

In [None]:
tmp[0]

In [None]:
predictions = learner.predict(X)
np.mean(predictions[:, 0]), np.mean(predictions[:, 2]), np.mean(predictions[:, 4])

In [None]:
in_the_range = np.sum((y >= predictions[:, 0]) & (y <= predictions[:, 4]))
print("Percentage in the range (expecting 90%):", in_the_range / len(y) * 100)

In [None]:
out_of_the_range = np.sum((y < predictions[:, 0]) | (y > predictions[:, 4]))
print("Percentage out of the range (expecting 10%):", out_of_the_range / len(y)  * 100)

## Legacy

In [None]:
class MacroEconomicModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(MacroEconomicModel, self).__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])  # Используйте только последний временной шаг для предсказания
        return out

# Определение размеров входных и выходных данных
input_size = 3  # Размерность данных о приросте ВВП, инфляции и процентной ставке
hidden_size = 64  # Размер скрытого состояния RNN
output_size = 15  # Размер параметров модели
num_layers = 2  # Количество слоев LSTM     
                                
# Создание экземпляра модели
model = MacroEconomicModel(input_size, hidden_size, output_size, num_layers).to(device=device)

# Определение функции потерь и оптимизатора
criterion = nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# CNN BiLSTM
class CNN_BiLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, bidirectional, cnn_out_channels, cnn_kernel_size):
        super(CNN_BiLSTM, self).__init__()
        
        # 1D Convolutional Layer
        self.cnn = nn.Sequential(
            nn.Conv1d(input_dim, cnn_out_channels, kernel_size=cnn_kernel_size, stride=1),
            nn.MaxPool1d(kernel_size=2),
            nn.ReLU()
        )
        
        # BiLSTM Layer
        self.bilstm = nn.LSTM(input_size=cnn_out_channels, 
                              hidden_size=hidden_dim, 
                              num_layers=num_layers, 
                              bidirectional=bidirectional, 
                              batch_first=True)
        
        # Fully Connected Layer
        self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
        
    def forward(self, x):
        # Input x: (batch_size, sequence_length, input_dim)
        x = x.permute(0, 2, 1)  # Reshape for Conv1d: (batch_size, input_dim, sequence_length)
        x = self.cnn(x)
        x = x.permute(0, 2, 1)  # Reshape for BiLSTM: (batch_size, sequence_length, cnn_out_channels)
        output, (hidden, cell) = self.bilstm(x)
        # Take the output of the last time step
        if self.bilstm.bidirectional:
            hidden = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)
        else:
            hidden = hidden[-1, :, :]
        out = self.fc(hidden)
        return out


# Define the model with your specific parameters
input_dim = 3  # Input dimension
hidden_dim = 64  # Hidden dimension for BiLSTM
output_dim = 15  # Output dimension
num_layers = 2  # Number of BiLSTM layers
bidirectional = True  # Use bidirectional BiLSTM
cnn_out_channels = 64  # Number of CNN output channels
cnn_kernel_size = 3  # Kernel size for CNN

model = CNN_BiLSTM(input_dim, hidden_dim, output_dim, num_layers, bidirectional, cnn_out_channels, cnn_kernel_size).to(device=device)

# Loss & optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Преобразование данных в тензоры PyTorch
y_train = torch.Tensor(y_train)
X_train = torch.Tensor(X_train)

y_valid = torch.Tensor(y_valid)
X_valid = torch.Tensor(X_valid)


batch_size = 180
kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
train_dataset = TensorDataset(y_train, X_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, **kwargs)

valid_dataset = TensorDataset(y_valid, X_valid)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, **kwargs)

sss = int(X_train.shape[0]/batch_size+1)
# Обучение модели
num_epochs = 10  # Количество эпох обучения
history = {
    'training_loss':[],
    'validation_loss':[]
}
for epoch in range(num_epochs):
    model.train()
    batch_losses = []#.to(device)
    for batch in train_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)
        # Обнуляем градиенты
        optimizer.zero_grad()

        # Проход вперед (forward pass)
        outputs = model(inputs)

        # Вычисление функции потерь
        loss = torch.sqrt(criterion(outputs, targets.squeeze()))
        #batch_losses.append(loss.detach().numpy())
        batch_losses.append(loss.item())
        loss.backward()
        optimizer.step()
    #print(len(batch_losses))
    training_loss = np.mean(batch_losses)
    
    with torch.no_grad():
        batch_val_losses = []
        for batch_val in valid_loader:
            inputs_val, targets_val = batch_val
            inputs_val, targets_val = inputs_val.to(device), targets_val.to(device)
            #print(inputs_val.shape, targets_val.shape)
            model.eval()

            # Проход вперед (forward pass)
            outputs_val = model(inputs_val)

            # Вычисление функции потерь
            loss_val = torch.sqrt(criterion(outputs_val, targets_val.squeeze()))
            batch_val_losses.append(loss_val)
            #print(batch_val_losses)
           # print(type(batch_val_losses))
            validation_loss = torch.mean(torch.stack(batch_val_losses))



    # Выводим информацию о процессе обучения
    #print(f'Эпоха [{epoch + 1}/{num_epochs}], Потери train: {loss.item()}')
    print(f'Эпоха [{epoch + 1}/{num_epochs}], Потери train: {training_loss.item()}, Loss valid: {validation_loss.item()}')
    history['training_loss'].append(training_loss.item())
    history['validation_loss'].append(validation_loss.item())

In [None]:
print(history)
from matplotlib import pyplot as plt
#plt.plot(torch.stack(history['training_loss']).cpu())
plt.plot(history['training_loss'])
plt.plot(history['validation_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
torch.save(model.state_dict(), 'trained_model_CNN_BiLSTM.pth')

In [None]:
outputs_val.shape

----
### Prediction

In [None]:
y_smp_test = np.load('data/y_smp_test.npy')
y_test = torch.Tensor(y_smp_test)
y_test.shape

In [None]:
# Create dataloader
test_dataset = TensorDataset(y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# Evalute model on test data
with torch.no_grad():
    all_outputs_test = []
    for batch in test_loader:
        inputs_test = batch[0]
        inputs_test = inputs_test.to(device=device)
        model.eval()

        outputs_test = model(inputs_test).unsqueeze(2)
        all_outputs_test.append(outputs_test)

    # Concat to common tensor
    final_outputs_test = torch.cat(all_outputs_test, dim=0)

In [None]:
batch_size = 5  # Window size

# Empty tensor for saving result
result = torch.zeros((final_outputs_test.size(0), final_outputs_test.size(1), 6))

for i in range(0, final_outputs_test.size(0), batch_size):
    batch = final_outputs_test[i:i+batch_size]
    
    # Calculate mean
    batch_mean = torch.mean(batch, dim=0).squeeze(1)

    # Sort batch for calculate quantiles
    sorted_batch, _ = torch.sort(batch, dim=0)
    sorted_batch = sorted_batch.to(device=device)
    quantiles = torch.quantile(sorted_batch, torch.Tensor([0.1, 0.25, 0.5, 0.75, 0.9]).to(device=device), dim=0).to(device=device)

    # Save to final tensor
    result[i:i+batch_size, :, 0] = batch_mean
    result[i:i+batch_size, :, 1:6] = quantiles

result.shape

In [None]:
batch_size = 5  # Window size

# Empty tensor for saving result
result = torch.zeros((final_outputs_test.size(0), final_outputs_test.size(1), 6))

for i in range(0, final_outputs_test.size(0), batch_size):
    batch = final_outputs_test[i:i+batch_size]
    
    # Calculate mean
    batch_mean = torch.mean(batch, dim=0).squeeze(1)

    # Sort batch for calculate quantiles
    sorted_batch, _ = torch.sort(batch, dim=0)
    sorted_batch = sorted_batch.to(device=device)
    quantiles = torch.quantile(sorted_batch, torch.Tensor([0.1, 0.25, 0.5, 0.75, 0.9]).to(device=device), dim=0)

    # Save to final tensor
    result[i:i+batch_size, :, 0] = batch_mean
    result[i:i+batch_size, :, 1:6] = quantiles

result.shape

In [None]:
np.save(file='submission.npy', arr=result)