In [None]:
import torch
import torch.nn as nn

device = 'cuda'

Using One Iteration Reverse Net

In [None]:
class OneIterationReverseNet(nn.Module):
    def __init__(self, info_ch, ch):
        super().__init__()
        self.relu = nn.ReLU()
        self.conv1 = nn.Conv2d(info_ch, ch, 5, padding=4, padding_mode='circular')
        self.conv2 = nn.Conv2d(ch, ch, 3, )
        self.conv3 = nn.Conv2d(ch, info_ch, 3)
        
        
    def forward(self, input):
        x = self.relu(self.conv1(input))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        return x

Reverse Model Net

In [None]:
class ReverseModel(nn.Module):
    def __init__(self, info_ch=64, ch=128):
        super().__init__()
        self.relu = nn.ReLU()
        self.encoder = nn.Conv2d(1, info_ch, 7, padding=3, padding_mode='circular')# you can use other model
        self.reverse_one_iter = OneIterationReverseNet(info_ch, ch)# you can use other model
        self.decoder = nn.Conv2d(info_ch, 1, 3, padding=1, padding_mode='circular')# you can use other model
        
    
    def forward(self, stop, delta):
        x = self.relu(self.encoder(stop-0.5))
        
        for i in range(delta.max().item()):
            y = self.reverse_one_iter(x)
            
            # this 2 lines allow use samples with different delta in one batch
            mask = (delta > i).reshape(-1,1,1,1)
            x = x*(~mask).float() + y*mask.float()
            
        x = self.decoder(x)
        
        return x

# Load Data

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
!rm -rf logs

This creates the test and train directories to be able to use the data from kaggle.  Upload kaggle.json file from kaggle.

In [None]:
!pip install -q kaggle
from google.colab import files 
files.upload()

! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets list
! kaggle competitions download -c conways-reverse-game-of-life-2020
! mkdir train
! unzip train.csv.zip -d train
! mkdir test
! unzip test.csv.zip -d test

mkdir: cannot create directory ‘/root/.kaggle’: File exists
ref                                                          title                                           size  lastUpdated          downloadCount  
-----------------------------------------------------------  ---------------------------------------------  -----  -------------------  -------------  
alexgude/california-traffic-collision-data-from-switrs       California Traffic Collision Data from SWITRS    1GB  2020-11-22 16:51:55            415  
babyoda/women-entrepreneurship-and-labor-force               Women Entrepreneurship and Labor Force           1KB  2020-11-21 08:38:51           1459  
szymonjanowski/internet-articles-data-with-users-engagement  Internet news data with readers engagement       3MB  2020-11-21 17:09:57            613  
sakshigoyal7/credit-card-customers                           Credit Card customers                          379KB  2020-11-19 07:38:44           2662  
imoore/2020-us-general-elect

assign the training data and testing data

In [None]:
train_data = pd.read_csv('/content/train/train.csv', index_col = 'id')
test = pd.read_csv('/content/test/test.csv', index_col = 'id')

train, val = train_test_split(train_data, test_size = 0.2, shuffle = True, random_state= 42, stratify=train_data['delta'])

In [None]:
print(train)
print(val)

       delta  start_0  start_1  start_2  ...  stop_621  stop_622  stop_623  stop_624
id                                       ...                                        
20555      5        0        1        1  ...         1         0         0         0
21476      5        0        0        0  ...         0         0         0         0
21220      4        0        1        0  ...         0         0         0         0
38159      5        0        1        1  ...         0         0         0         0
30246      3        1        1        0  ...         0         0         0         0
...      ...      ...      ...      ...  ...       ...       ...       ...       ...
38726      1        0        0        0  ...         0         0         0         0
47315      2        0        0        0  ...         0         1         0         1
42986      1        0        0        0  ...         1         1         0         0
34080      3        0        0        0  ...         0         0 

# Dataset

In [None]:
from torch.utils.data import DataLoader, Dataset
from torch import FloatTensor, LongTensor

In [None]:
def line2grid_tensor(data, device='cuda'):
    grid = data.to_numpy().reshape((data.shape[0], 1, 25, 25))
    return FloatTensor(grid).to(device)

In [None]:
class TaskDataset(Dataset):
    def __init__(self, data, device='cuda'):
        self.delta = LongTensor(data['delta'].to_numpy()).to(device)
        if data.shape[1] == 1251: 
            self.start = line2grid_tensor(data.iloc[:,1:626], device)
            self.stop = line2grid_tensor(data.iloc[:,626:], device)
        else:
            self.start = None
            self.stop = line2grid_tensor(data.iloc[:,1:], device)
        
    def __len__(self):
        return len(self.delta)

    def __getitem__(self, idx):
        if self.start is None:
            return {'stop': self.stop[idx], 'delta': self.delta[idx]}
        return {'start': self.start[idx], 'stop': self.stop[idx], 'delta': self.delta[idx]}

In [None]:
dataset_train = TaskDataset(train)
dataloader_train = DataLoader(dataset_train, batch_size=128, shuffle=True)

dataset_val = TaskDataset(val)
dataloader_val = DataLoader(dataset_val, batch_size=128, shuffle=False)

dataset_test = TaskDataset(test)
dataloader_test = DataLoader(dataset_test, batch_size=128, shuffle=False)

In [None]:
!pip install catalyst==20.10.1

from catalyst.dl import SupervisedRunner
from catalyst.dl.callbacks import CriterionCallback, EarlyStoppingCallback, AccuracyCallback
from catalyst.contrib.utils import plotly
from catalyst.contrib.nn.optimizers import RAdam, Lookahead

import collections

runner = SupervisedRunner(device='cuda', input_key=['stop', 'delta'], )

loaders = {'train': dataloader_train, 'valid': dataloader_val}#collections.OrderedDict({'train': dataloader_train, 'valid': dataloader_val})

model = ReverseModel()

optimizer = Lookahead(RAdam(params=model.parameters(), lr=1e-3))

criterion = {"bce": nn.BCEWithLogitsLoss()}

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.25, patience=2)

callbacks = [
        CriterionCallback(input_key='start', prefix="loss", criterion_key="bce"),
        EarlyStoppingCallback(patience=5),
    ]

logdir = "./logs"
num_epoch = 32

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=callbacks,
    logdir=logdir,
    num_epochs=num_epoch,
    main_metric="loss",
    minimize_metric=True,
    verbose=True,
)


1/32 * Epoch (train):   0% 0/313 [00:00<?, ?it/s][A
1/32 * Epoch (train):   0% 0/313 [00:00<?, ?it/s, loss=0.700][A
1/32 * Epoch (train):   0% 1/313 [00:00<01:46,  2.93it/s, loss=0.700][A
1/32 * Epoch (train):   0% 1/313 [00:00<01:46,  2.93it/s, loss=0.699][A
1/32 * Epoch (train):   1% 2/313 [00:00<01:41,  3.06it/s, loss=0.699][A
1/32 * Epoch (train):   1% 2/313 [00:00<01:41,  3.06it/s, loss=0.699][A
1/32 * Epoch (train):   1% 3/313 [00:00<01:39,  3.13it/s, loss=0.699][A
1/32 * Epoch (train):   1% 3/313 [00:01<01:39,  3.13it/s, loss=0.699][A
1/32 * Epoch (train):   1% 4/313 [00:01<01:36,  3.21it/s, loss=0.699][A
1/32 * Epoch (train):   1% 4/313 [00:01<01:36,  3.21it/s, loss=0.699][A
1/32 * Epoch (train):   2% 5/313 [00:01<01:34,  3.25it/s, loss=0.699][A
1/32 * Epoch (train):   2% 5/313 [00:01<01:34,  3.25it/s, loss=0.699][A
1/32 * Epoch (train):   2% 6/313 [00:01<01:33,  3.28it/s, loss=0.699][A
1/32 * Epoch (train):   2% 6/313 [00:02<01:33,  3.28it/s, loss=0.699][A
1/32 

In [None]:
%load_ext tensorboard
%tensorboard --logdir logs

from catalyst.contrib.utils import *
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
plotly.plot_tensorboard_log(logdir=logdir, step = 'epoch', metrics = "loss")

# Prediction

In [None]:
best_model = ReverseModel().to('cuda')
best_model.load_state_dict(torch.load('logs/checkpoints/best.pth')['model_state_dict'])

In [None]:
! mkdir sample_submission
! unzip sample_submission.csv.zip -d sample_submission

In [None]:
def predict_batch(model, batch):
    model.eval()
    with torch.no_grad():
        prediction = model(batch['stop'], batch['delta'])
        prediction = torch.sigmoid(prediction).detach().cpu().numpy()
        return prediction

    
def predict_loader(model, loader):
    predict = [predict_batch(model, batch) for batch in loader]
    predict = np.concatenate(predict)
    return predict


def validate_loader(model, loader, lb_delta=None, threshold=0.5):
    prediction_val = predict_loader(best_model, loader)
    y_val = loader.dataset.start.detach().cpu().numpy()
    delta_val = loader.dataset.delta.detach().cpu().numpy()

    score = ((prediction_val > threshold) == y_val).mean(axis=(1,2,3))
    print(f'All data accuracy: {score.mean()}')
        
    delta_socre = {}
    for i in range(1, 6):
        delta_socre[i] = score[delta_val==i].mean()#print(f'delta={i} accuracy: {score[delta_val==i].mean()}')
        print(f'delta={i} accuracy: {delta_socre[i]}')
        
    if lb_delta is not None:
        lb_delta = lb_delta.value_counts(normalize=True)
        test_score = sum([lb_delta[i]*delta_socre[i] for i in range(1,6)])
        print(f'VAL score         : {1-score.mean()}')
        print(f'LB  score estimate: {1-test_score}')
    
    
def make_submission(prediction, threshold=0.5, sample_submission_path='/content/sample_submission/sample_submission.csv'):
    prediction = (prediction > threshold).astype(int).reshape(-1, 625)
    
    sample_submission = pd.read_csv(sample_submission_path, index_col='id')
    sample_submission.iloc[:] = prediction
    return sample_submission

In [None]:
validate_loader(best_model, dataloader_val, test['delta'])

In [None]:
prediction_test = predict_loader(best_model, dataloader_test)
submission = make_submission(prediction_test)
submission.to_csv('submission.csv')
submission



In [None]:
# submit the file to kaggle
!kaggle competitions submit conways-reverse-game-of-life-2020 -f submission.csv -m "Reverse Game of Life First Submission"