In [1]:
# load src/
import sys
import random
sys.path.append('../')
import src.dataset

import numpy as np
from tqdm.auto import tqdm, trange

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
X_train, X_valid, X_test = src.dataset.load_datasets()

In [3]:
# For final submission:
#import scipy
#X_train = X_train + X_valid
#mean = X_train[X_train.nonzero()].mean()

In [4]:
!nvidia-smi

Fri Jun 12 15:53:39 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.104      Driver Version: 410.104      CUDA Version: 10.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 1080    On   | 00000000:01:00.0 Off |                  N/A |
|  0%   36C    P8    15W / 230W |     42MiB /  8117MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [5]:
X_train.count_nonzero(), X_valid.count_nonzero(), X_test.count_nonzero()
valid_indices = list(set(zip(X_train.nonzero()[0], X_train.nonzero()[1])))
# valid_per_line = [[] for _ in range(10000)]
# for i, j in valid_indices:
#     valid_per_line[i].append((i,j))

In [6]:
# from collections import Counter
# def count_vals(X, valid_indices):
#     counts = {}
#     for i, j in valid_indices:
#         val = X[i, j]
#         counts.setdefault(val, 0)
#         counts[val] += 1
#     return counts
# print(count_vals(X_train, valid_indices))

In [7]:
# import random
# X_unbias = X_train.copy()
# for i, j in valid_indices:
#     val = X_train[i, j]
#     if val >= 3 and random.random() < 0.5:
#         X_unbias[i, j] = 0

In [8]:
# valid_indices_unbias = list(set(zip(X_unbias.nonzero()[0], X_unbias.nonzero()[1])))
# print(count_vals(X_unbias, valid_indices))

In [9]:
def _add_regularization(model):
    l2_reg = None
    for W in model.parameters():
        if l2_reg is None:
            l2_reg = W.norm(2)
        else:
            l2_reg = l2_reg + W.norm(2)
    return l2_reg

In [10]:
class Autoencoder(nn.Module):

    def __init__(self):
        super().__init__()
        self.user_dim = 10000
        self.item_dim = 1000
        self.embed_dim = 150
        self.user_embed = nn.Embedding(self.user_dim, self.embed_dim)
        self.item_embed = nn.Embedding(self.item_dim, self.embed_dim)
        self.layer_1 = nn.Linear(self.embed_dim*2, 300)
        self.layer_1a = nn.Linear(300, 300)
        self.layer_2 = nn.Linear(300, 50)
        self.layer_3 = nn.Linear(50, 50)
        self.layer_3a = nn.Linear(50, 300)
        
        self.cls_layer = nn.Linear(300, 1)
    
    def forward(self, data):
        user_idx, item_idx = torch.split(data, 1, dim=1)
#         print("user",user_idx)
#         print("item",item_idx)
        user_idx = torch.squeeze(user_idx, dim=-1)
        item_idx = torch.squeeze(item_idx, dim=-1)

        user_embedding = self.user_embed(user_idx)
        item_embedding = self.item_embed(item_idx)
        assert user_embedding.shape[-1] == item_embedding.shape[-1] == self.embed_dim

        # Input is concatenation
        net_data = torch.cat([user_embedding, item_embedding], dim=-1)

        # Feedforward layers
        net_data = F.relu(self.layer_1(net_data))
        net_data = F.dropout(net_data)
        
        net_data = F.relu(self.layer_1a(net_data))
        net_data = F.dropout(net_data)
        
        net_data = F.relu(self.layer_2(net_data))
        net_data = F.dropout(net_data)
        
        net_data = F.relu(self.layer_3(net_data))
        net_data = F.dropout(net_data)
        
        net_data = F.relu(self.layer_3a(net_data))
        net_data = F.dropout(net_data)
        
        # Predict score for position A_i_j
        y_score = self.cls_layer(net_data)
#         y_score = (4.0 * torch.sigmoid(y_score)) + 1.0
#         return torch.sigmoid(y_score) * 6.0
        return y_score

In [12]:
#del model
model = Autoencoder().cuda()

In [13]:
device = torch.device('cuda')
opt = torch.optim.Adam(model.parameters(), lr=1e-4)

In [16]:
print(X_train.count_nonzero(), X_valid.count_nonzero())

def get_predictions(A):
    model.eval()
    A_pred = np.zeros((10000, 1000))
    
    valid_indices = list(set(zip(A.nonzero()[0], A.nonzero()[1])))
    for i in range(0, len(valid_indices), 64):
        X = valid_indices[i:i+64]
        X_ = torch.tensor(X, dtype=torch.long, device=device)
        y_preds = model(X_)
        if USE_CLS:
            y_preds = torch.argmax(torch.softmax(y_preds, dim=-1), dim=-1)
            if i == 0:
                print(y_preds)
        for j in range(len(X)):
            pred = y_preds[j]
            A_pred[X[j]] = pred

    print("Stats:", np.mean(A_pred[A_pred.nonzero()]), np.std(A_pred.nonzero()))
    return A_pred, valid_indices

def compute_loss():
    A_pred, valid_indices = get_predictions(X_valid)
    losses = np.square(X_valid - A_pred)
    losses = [losses[i,j] for (i, j) in valid_indices]
    mean_loss = np.mean(losses)
    return mean_loss

925567 251385


In [17]:
loss_avg = []
USE_CLS = False

for i in range(0, 300000):
    # randomly sample 32 points from training matrix
    model.train()
    X = random.sample(valid_indices, k=32) # + [(random.randint(0, 9999), random.randint(0, 999)) for _ in range(8)]
#     line = random.randint(0, 9999)
#     valid__ = valid_per_line[line]
#     X = valid__

    y_true = [[X_train[Xi]] for Xi in X]
    X = torch.tensor(X, dtype=torch.long, device=device)
    y_true = torch.tensor(y_true, dtype=torch.long if USE_CLS else torch.float, device=device)
    # step!
    
    #########
#     X = torch.tensor([[-1, -1], [0, 0], [2, 2]], dtype=torch.float)
#     y_true = torch.tensor([[-1.], [0.], [2.]], dtype=torch.float)
    #########
    
    opt.zero_grad()
    y_preds = model(X)
    if USE_CLS:
        loss_fn = torch.nn.CrossEntropyLoss(
            weight=torch.tensor((0., 3., 3., 1., 1., 1.), device=device))
#         print(y_preds.shape, y_true.shape)
        loss = loss_fn(y_preds, y_true.reshape(-1,))
    else:
        loss = (y_preds - y_true) ** 2
    
    # update avg loss
    loss_avg.insert(0, loss.mean().item())
    loss_avg = loss_avg[:500]
    
    if i % 1000 == 0:
        print("It %d Avg loss: %.4f" % (i, np.mean(loss_avg)))
    if i and i % 10000 == 0:
        mean_loss = compute_loss()
        print("Mean loss: %.4f" % mean_loss)
    
#     loss += _add_regularization(model) * 0.1
    loss.mean().backward()
    opt.step()


It 0 Avg loss: 15.8741
It 1000 Avg loss: 2.1456
It 2000 Avg loss: 1.8474
It 3000 Avg loss: 1.6664
It 4000 Avg loss: 1.5339
It 5000 Avg loss: 1.4134
It 6000 Avg loss: 1.3581
It 7000 Avg loss: 1.3275
It 8000 Avg loss: 1.2856
It 9000 Avg loss: 1.2638
It 10000 Avg loss: 1.2848
Stats: 3.8287998216420642 3104.080220526129
Mean loss: 1.2716
It 11000 Avg loss: 1.2687
It 12000 Avg loss: 1.2612
It 13000 Avg loss: 1.2521
It 14000 Avg loss: 1.2481
It 15000 Avg loss: 1.2474
It 16000 Avg loss: 1.2156
It 17000 Avg loss: 1.2348
It 18000 Avg loss: 1.2217
It 19000 Avg loss: 1.2345
It 20000 Avg loss: 1.2139
Stats: 3.8869121625784446 3104.080220526129
Mean loss: 1.2086
It 21000 Avg loss: 1.2058
It 22000 Avg loss: 1.2215
It 23000 Avg loss: 1.1908
It 24000 Avg loss: 1.1703
It 25000 Avg loss: 1.1876
It 26000 Avg loss: 1.1439
It 27000 Avg loss: 1.2039
It 28000 Avg loss: 1.1833
It 29000 Avg loss: 1.1759
It 30000 Avg loss: 1.1699
Stats: 3.8443906837586446 3104.080220526129
Mean loss: 1.1669
It 31000 Avg loss: 1

It 251000 Avg loss: 1.0315
It 252000 Avg loss: 1.0174
It 253000 Avg loss: 1.0075
It 254000 Avg loss: 1.0264
It 255000 Avg loss: 1.0119
It 256000 Avg loss: 1.0088
It 257000 Avg loss: 1.0182
It 258000 Avg loss: 1.0074
It 259000 Avg loss: 1.0077
It 260000 Avg loss: 1.0131
Stats: 3.8531360406386415 3104.080220526129
Mean loss: 1.0520
It 261000 Avg loss: 1.0264
It 262000 Avg loss: 1.0151
It 263000 Avg loss: 1.0157
It 264000 Avg loss: 1.0170
It 265000 Avg loss: 1.0165
It 266000 Avg loss: 1.0147
It 267000 Avg loss: 0.9997
It 268000 Avg loss: 1.0139
It 269000 Avg loss: 1.0222
It 270000 Avg loss: 1.0040
Stats: 3.854191223211262 3104.080220526129
Mean loss: 1.0477
It 271000 Avg loss: 1.0090
It 272000 Avg loss: 1.0162
It 273000 Avg loss: 1.0234
It 274000 Avg loss: 1.0331
It 275000 Avg loss: 0.9985
It 276000 Avg loss: 0.9918
It 277000 Avg loss: 1.0063
It 278000 Avg loss: 1.0227
It 279000 Avg loss: 1.0225
It 280000 Avg loss: 1.0128
Stats: 3.8903227723901224 3104.080220526129
Mean loss: 1.0492
It 28

In [None]:
# 1. Don't use the classifier, it's not good
# 2. With bs=32, clsweights 22111 adam and 100 neurons it works
# Trained on 1e-4 to 100k, 1.035 valid. loss
# Trained on 1e-5 to 150k, 1.025 valid. loss
# Trained on 1e-5 to 200k, 1.025 valid. loss
# 3. With clsweights 33111 it is the same up to 100k
# 4. With 200 and 300 neurons (150 embedding) the same up to 100k
# 5. With bottleneck of 50 neurons the same up to 100k
## SCORE: 1.05, not good
# Avg loss @100k: 0.95, @200k: 0.92

# With everything, avg loss @300k: 0.87, but worse than before
# Again just 4/5:
# 0.98 and 1.04

In [18]:
mean_loss = compute_loss()
print(mean_loss)

Stats: 3.847531561599506 3104.080220526129
1.0454018920868178


In [27]:
# Submission

In [28]:
import pandas as pd

def export_and_save(target, preds):
    target_rows, target_cols = target.nonzero()
    ids = [f"r{row+1}_c{col+1}" for row, col in zip(target_rows, target_cols)]
    scores = [np.round(preds[row, col]) for row, col in zip(target_rows, target_cols)]
    df = pd.DataFrame({"Id": ids, "Prediction": scores})
    df.to_csv("preds.csv", index=False, float_format='%.4f')

export_and_save(X_test, get_predictions(X_test)[0])

Stats: 3.8356630181218976 3120.383955674073
