# Deep-Channel-Pytorch

**Deep-Channel**的Pytorch实现，并进行了一定的修改

*By: Roger Zhu*

![图片被吃掉啦！](./images/title_ohne_abs.png)

In [None]:
import math
import random
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as tud

import matplotlib.pyplot as plt
%matplotlib inline

# set device
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device('cuda' if USE_CUDA else 'cpu')

# set random seed
SEED = 10015
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if USE_CUDA:
    torch.cuda.manual_seed(SEED)

# hyperparameters
# data
time_batch = 500000

# 1D-CNN
OUT_DIM = 64

# LSTM
N_STEP = 100
N_HIDDEN = 256
N_LAYER = 3

# train
EPOCHS = 50
LR = 0.01

In [None]:
train_df = pd.read_csv('./data/train.csv')
train_df

In [None]:
train_df.info()
train_df.describe(include='all')

In [None]:
max_channel = train_df.iloc[:, -1].max()
time_batch_sz = len(train_df) // time_batch

train_data = torch.tensor(
    train_df.iloc[:, -2], dtype=torch.float32, device=DEVICE)
train_label = torch.tensor(
    train_df.iloc[:, -1], dtype=torch.int64, device=DEVICE)

In [None]:
class train_Dataset(tud.Dataset):
    def __init__(self, data, label, time_batch, time_batch_sz):
        super(train_Dataset, self).__init__()
        self.time_batch = time_batch
        self.time_batch_sz = time_batch_sz
        self.data = data.view(self.time_batch_sz, self.time_batch, 1)
        self.label = label.view(self.time_batch_sz, self.time_batch, 1)

    def __len__(self):
        return self.time_batch

    def __getitem__(self, idx):
        # dataloader output: (n_step, time_batch_size(B), 1)
        # get last time_batch_sz as validation data
        return ((self.data[:-1, idx, :], self.label[:-1, idx, :]),
                (self.data[-1:, idx, :], self.label[-1:, idx, :]))

In [None]:
train_ds = train_Dataset(train_data, train_label, time_batch, time_batch_sz)
train_dl = tud.DataLoader(train_ds, shuffle=False, batch_size=N_STEP)

In [None]:
class Model(nn.Module):
    def __init__(self, n_hidden, n_layer, max_channel, out_dim):
        super(Model, self).__init__()
        self.n_hidden = n_hidden
        self.n_layer = n_layer
        self.max_channel = max_channel
        self.out_dim = out_dim

        self.conv = nn.Conv1d(1, self.out_dim, kernel_size=1)
        self.bn1 = nn.BatchNorm1d(self.out_dim)
        self.lstm = nn.LSTM(self.out_dim, self.n_hidden,
                            self.n_layer, dropout=0.2)
        self.ln1 = nn.Linear(self.n_hidden, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.ln2 = nn.Linear(128, 64)
        self.bn3 = nn.BatchNorm1d(64)
        self.ln3 = nn.Linear(64, self.max_channel + 1)
        self.drop = nn.Dropout(0.3)

    def forward(self, x, hidden):
        # input_size: (n_step, B, 1)
        x = x.permute(1, 2, 0).contiguous()  # (B, 1, n_step)
        x = F.relu(self.bn1(self.conv(x)))  # (B, out_dim, n_step)
        x = x.permute(2, 0, 1).contiguous()  # (n_step, B, out_dim)
        x, hidden = self.lstm(x, hidden)  # (n_step, B, n_hidden)
        x = x.view(-1, self.n_hidden)  # (n_step * B, n_hidden)
        x = F.relu(self.bn2(self.ln1(x)))
        x = self.drop(x)
        x = F.relu(self.bn3(self.ln2(x)))
        x = self.drop(x)
        output = self.ln3(x)  # (n_step * B, max_channel + 1)

        return output, hidden

    def initweight(self):
        initrange = 0.1
        self.conv.weight.data.uniform_(-initrange, initrange)
        self.ln1.weight.data.uniform_(-initrange, initrange)
        self.ln2.weight.data.uniform_(-initrange, initrange)
        self.ln3.weight.data.uniform_(-initrange, initrange)

        self.conv.bias.data.zero_()
        self.ln1.bias.data.zero_()
        self.ln2.bias.data.zero_()
        self.ln3.bias.data.zero_()

    def inithidden(self, batch_size, requires_grad=True):
        weight = next(self.parameters())
        return (weight.new_zeros(self.n_layer, batch_size, self.n_hidden, requires_grad=requires_grad),
                weight.new_zeros(self.n_layer, batch_size, self.n_hidden, requires_grad=requires_grad))

In [None]:
model = Model(N_HIDDEN, N_LAYER, max_channel, OUT_DIM)
if USE_CUDA:
    model.cuda()
model.initweight()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, 0.9)

In [None]:
epoch_loss_list = []
epoch_acc_list = []
epoch_val_loss_list = []
epoch_val_acc_list = []
max_val_acc = None
for epoch in range(EPOCHS):
    hidden = model.inithidden(time_batch_sz - 1)
    val_hidden = model.inithidden(1)
    epoch_loss = 0
    epoch_acc = 0
    epoch_val_loss = 0
    epoch_val_acc = 0
    for i, batch in enumerate(train_dl):
        (data, label), (val_data, val_label) = batch
        if USE_CUDA:
            data = data.cuda()
            label = label.cuda()
            val_data = val_data.cuda()
            val_label = val_label.cuda()
        label = label.view(-1, label.shape[-1]).squeeze(-1)
        val_label = val_label.view(-1, val_label.shape[-1]).squeeze(-1)

        model.eval()
        with torch.no_grad():
            val_output, val_hidden = model(val_data, val_hidden)
            val_loss = loss_fn(val_output, val_label)
            val_acc = (torch.argmax(val_output, dim=1)
                       == val_label).cpu().sum().numpy() / len(val_label)
            epoch_val_loss += val_loss.item()
            epoch_val_acc += val_acc
        model.train()
        hidden = tuple(c.detach() for c in hidden)
        output, hidden = model(data, hidden)

        loss = loss_fn(output, label)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        epoch_loss += loss.item()
        acc = (torch.argmax(output, dim=1) ==
               label).cpu().sum().numpy() / len(label)
        epoch_acc += acc
        if i % 10 == 0:
            print('loss:', loss.item(), 'acc:', acc, '\n',
                  'val_loss:', val_loss.item(), 'val_acc:', val_acc)
            if max_val_acc == None or val_acc > max_val_acc:
                torch.save(model.state_dict(), 'best.th')
                max_val_acc = val_acc
                print('model state saved!')
    epoch_loss = epoch_loss / i
    epoch_acc = epoch_acc / i
    epoch_val_loss = epoch_val_loss / i
    epoch_val_acc = epoch_val_acc / i
    if epoch > 0:
        if epoch_acc < epoch_acc_list[-1]:
            scheduler.step()
            print('learning rate changed!')
    epoch_loss_list.append(epoch_loss)
    epoch_acc_list.append(epoch_acc)
    epoch_val_loss_list.append(epoch_val_loss)
    epoch_val_acc_list.append(epoch_val_acc)

In [None]:
plt.figure(figsize=(24, 6))

plt.subplot(1, 2, 1)
plt.plot(epoch_loss_list, label='loss')
plt.plot(epoch_val_loss_list, label='val_loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epoch_acc_list, label='acc')
plt.plot(epoch_val_acc_list, label='val_acc')
plt.legend()

In [None]:
test_df = pd.read_csv('./data/test.csv')
test_df

In [None]:
test_df.info()
test_df.describe(include='all')

In [None]:
test_time_bsz = len(test_df) // time_batch
test_data = torch.tensor(
    test_df.iloc[:, -1], dtype=torch.float32, device=DEVICE)

In [None]:
class test_Dataset(tud.Dataset):
    def __init__(self, data, time_batch, time_batch_sz):
        super(test_Dataset, self).__init__()
        self.time_batch = time_batch
        self.time_batch_sz = time_batch_sz
        self.data = data.view(self.time_batch_sz, self.time_batch, 1)

    def __len__(self):
        return self.time_batch

    def __getitem__(self, idx):
        # dataloader output: (n_step, time_batch_size(B), 1)
        return self.data[:, idx, :]

In [None]:
test_ds = test_Dataset(test_data, time_batch, test_time_bsz)
test_dl = tud.DataLoader(test_ds, shuffle=False, batch_size=N_STEP)

In [None]:
model.load_state_dict(torch.load('best.th', map_location=torch.device('cpu')))
model.eval()
pred = None
with torch.no_grad():
    hidden = model.inithidden(test_time_bsz, requires_grad=False)
    for data in test_dl:
        if USE_CUDA:
            data = data.cuda()
        output, hidden = model(data, hidden)  # (n_step * B, max_channel + 1)
        # (n_step, B, max_channel + 1)
        output = output.view(N_STEP, test_time_bsz, -1)
        if pred == None:
            pred = torch.argmax(output, dim=2).permute(1, 0)  # (B, n_step)
        else:
            pred = torch.cat(
                (pred, torch.argmax(output, dim=2).permute(1, 0)), dim=1)
    pred = pred.view(test_time_bsz * time_batch)
    pred = pred.cpu()

In [None]:
plt.plot(pred)

In [None]:
submission = pd.read_csv('./data/sample_submission.csv')
submission.iloc[:, -1] = pred
submission.to_csv('submission.csv', index=False, float_format='%.4f')