In [22]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from tqdm.notebook import tqdm, trange
import scipy.stats as sps

In [23]:
data_np_train = np.load("y_smp_train.npy")
data_np_train_labels = np.load("pars_smp_train.npy")
data_np_test = np.load("y_smp_test.npy")

In [24]:
class Series_data(Dataset):
    def __init__(self, data_np, target_np = None):
        super().__init__()
        self.data_np = data_np
        self.target_np = target_np

    def __len__(self):
        return len(self.data_np)

    def __getitem__(self, index):
        if (self.target_np is not None):
            sample = {"data" : torch.from_numpy(self.data_np[index]).float(), "label": torch.from_numpy(self.target_np[index]).float().flatten()}
        else:
            sample = {"data" : torch.from_numpy(self.data_np[index]).float()}
        return sample

In [25]:
train_dataset = Series_data(data_np_train, data_np_train_labels)
val_dataset = Series_data(data_np_train, data_np_train_labels)
test_dataset = Series_data(data_np_test)

In [26]:
loaders = {
    'train': DataLoader(train_dataset, shuffle=True, batch_size=128),
    'val': DataLoader(val_dataset, shuffle=False, batch_size=128),
    'test': DataLoader(test_dataset, shuffle=False, batch_size=128)
}

In [27]:
device = 'cuda'

In [32]:
def training(model, criterion, optimizer, num_epochs, loaders, max_grad_norm=2):
    best_loss = 9999
    for e in trange(num_epochs, leave=False):
        model.train()
        num_iter = 0
        pbar = tqdm(loaders["train"], leave=False)
        for sample in pbar:
            optimizer.zero_grad()
            inputs = sample["data"].to(device)
            labels = sample["label"].to(device)
            prediction = model(inputs)
            loss = criterion(prediction, labels)
            loss.backward()
            if max_grad_norm is not None:
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
            optimizer.step()
            num_iter += 1
        valid_loss = 0
        num_iter = 0
        model.eval()
        with torch.no_grad():
            pbar = tqdm(loaders["val"], leave=False)
            for sample in pbar:
                inputs = sample["data"].to(device)
                labels = sample["label"].to(device)
                prediction = model(inputs)
                valid_loss += criterion(prediction, labels)
                num_iter += 1
        if ((valid_loss / num_iter) < best_loss):
            best_model_wts = model.state_dict()
            best_loss = valid_loss / num_iter
        print(f"Valid Loss: {valid_loss / num_iter}")
    model.load_state_dict(best_model_wts)
    return model

In [35]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, batch_size=128, output_size=15):
        super().__init__()
        self.batch_size = batch_size
        self.lstm = nn.LSTM(input_size, hidden_size, 3, batch_first=True)
        self.lin1 = nn.Linear(hidden_size * 2, 200)
        self.sigmoid = nn.Sigmoid()
        self.lin2 = nn.Linear(200, output_size)


    def forward(self, x):
        output, (hn, cn) = self.lstm(x)
        output1 = output.mean(dim=1)
        output2 = output.max(dim=1)[0]
        output = self.lin1(torch.cat((output1, output2), dim=1))
        output = self.sigmoid(output)
        output = self.lin2(output)
        return output

In [36]:
model = LSTMModel(3, 100).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 15

In [37]:
model = training(model, criterion, optimizer, num_epochs, loaders)

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.038490477949380875


  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.03590776398777962


  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.03512037172913551


  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.03369101881980896


  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.03323784098029137


  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.033123865723609924


  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.03246855363249779


  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.03230319544672966


  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.0319226048886776


  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.03162214905023575


  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.032499637454748154


  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.03116694465279579


  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.03110741451382637


  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.032257918268442154


  0%|          | 0/7813 [00:00<?, ?it/s]

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.031140755861997604


LSTMModel(
  (lstm): LSTM(3, 100, num_layers=3, batch_first=True)
  (lin1): Linear(in_features=200, out_features=200, bias=True)
  (sigmoid): Sigmoid()
  (lin2): Linear(in_features=200, out_features=15, bias=True)
)

In [38]:
best_model_wts = model.state_dict()

In [39]:
path = "best_model.pth"
torch.save(best_model_wts, path)

In [40]:
def validate(model, criterion, loaders):
    with torch.no_grad():
        num_iter = 0
        valid_loss = torch.zeros(15).cuda()
        pbar = tqdm(loaders["val"], leave=False)
        for sample in pbar:
            inputs = sample["data"].to(device)
            labels = sample["label"].to(device)
            prediction = model(inputs)
            valid_loss += criterion(prediction, labels).mean(dim=0)
            num_iter += 1
    print(f"Valid Loss: {(valid_loss.cpu().mean() / num_iter)}")
    return (valid_loss.cpu() / num_iter).tolist()

In [41]:
val_criterion = nn.MSELoss(reduction='none')
val_loss = validate(model, val_criterion, loaders)

  0%|          | 0/7813 [00:00<?, ?it/s]

Valid Loss: 0.031140707433223724


[0.07726964354515076,
 0.27794715762138367,
 0.014929923228919506,
 0.002997052390128374,
 0.010458205826580524,
 0.0009394066873937845,
 0.03106650896370411,
 0.035353176295757294,
 0.00266340677626431,
 0.0009877304546535015,
 0.00803249143064022,
 0.0037162231747061014,
 0.0003479922597762197,
 0.00025515526067465544,
 0.00014657003339380026]

In [48]:
def test_diff_distr(model, loss, loaders):
    with torch.no_grad():
        num_iter = 0
        predictions = []
        pbar = tqdm(loaders["test"], leave=False)
        for sample in pbar:
            inputs = sample["data"].to(device)
            preds = model(inputs).cpu()
            for pred in preds:
                features = []

                distribution = sps.norm(pred[0], loss[0])
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[0]), feature), axis=None)
                features.append(feature)
                
                distribution = sps.gamma(loss[1], pred[1] - loss[1])
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[1]), feature), axis=None)
                features.append(feature)

                distribution = sps.norm(pred[2], loss[2])
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[2]), feature), axis=None)
                features.append(feature)

                distribution = sps.norm(pred[3], loss[3])
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[3]), feature), axis=None)
                features.append(feature)

                distribution = sps.norm(pred[4], loss[4])
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[4]), feature), axis=None)
                features.append(feature)

                distribution = sps.norm(pred[5], loss[5])
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[5]), feature), axis=None)
                features.append(feature)

                distribution = sps.gamma(loss[6], pred[6] - loss[6])
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[6]), feature), axis=None)
                features.append(feature)

                distribution = sps.gamma(loss[7], pred[7] - loss[7])
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[7]), feature), axis=None)
                features.append(feature)

                distribution = sps.norm(pred[8], loss[8])
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[8]), feature), axis=None)
                features.append(feature)

                distribution = sps.norm(pred[9], loss[9])
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[9]), feature), axis=None)
                features.append(feature)

                distribution = sps.norm(pred[10], loss[10])
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[10]), feature), axis=None)
                features.append(feature)

                distribution = sps.uniform(loc = (pred[11] - 3 * loss[11] ** 0.5), scale = (2 * 3 * loss[11] ** 0.5))
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[11]), feature), axis=None)
                features.append(feature)

                distribution = sps.norm(pred[12], loss[12])
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[12]), feature), axis=None)
                features.append(feature)

                distribution = sps.norm(pred[13], loss[13])
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[13]), feature), axis=None)
                features.append(feature)

                distribution = sps.norm(pred[14], loss[14])
                feature = distribution.ppf([0.1, 0.25, 0.5, 0.75, 0.9])
                feature = np.concatenate((np.array(pred[14]), feature), axis=None)
                features.append(feature)

                predictions.append(features)
            num_iter += 1
    return predictions

In [44]:
predictions = test_diff_distr(model, val_loss, loaders)

  0%|          | 0/782 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [20]:
predictions = np.array(predictions)
np.save('submit.npy', predictions)