## Datasets (SST)

In [14]:
from pathlib import Path
from sklearn.model_selection import train_test_split 
from tqdm import trange
import numpy as np
from gensim.models.keyedvectors import KeyedVectors
import torchtext
import torch
import torch.nn as nn
import torch.nn.functional as F
import time
import copy
import nltk

In [15]:
TEXT = torchtext.data.Field(lower=True, include_lengths=True, batch_first=True)
LABEL = torchtext.data.Field(sequential=False)
train, val, test = torchtext.datasets.SST.splits(TEXT, LABEL, root="data/", fine_grained=True)
TEXT.build_vocab(train, vectors="glove.840B.300d")
LABEL.build_vocab(train, val, test)
train_iter, val_iter, test_iter = torchtext.data.BucketIterator.splits(
    (train, val, test), batch_size=50 , device=0)

  0%|          | 0/2196017 [00:00<?, ?it/s]  0%|          | 627/2196017 [00:00<05:50, 6269.56it/s]  0%|          | 1285/2196017 [00:00<05:45, 6358.84it/s]  0%|          | 1805/2196017 [00:00<06:08, 5957.32it/s]  0%|          | 2424/2196017 [00:00<06:04, 6022.77it/s]  0%|          | 2906/2196017 [00:00<06:31, 5598.58it/s]  0%|          | 3370/2196017 [00:00<07:35, 4814.52it/s]  0%|          | 4043/2196017 [00:00<06:56, 5262.24it/s]  0%|          | 4587/2196017 [00:00<06:52, 5311.95it/s]  0%|          | 5133/2196017 [00:00<06:49, 5353.91it/s]  0%|          | 5659/2196017 [00:01<07:22, 4945.03it/s]  0%|          | 6169/2196017 [00:01<07:18, 4988.27it/s]  0%|          | 6702/2196017 [00:01<07:10, 5085.40it/s]  0%|          | 7306/2196017 [00:01<06:49, 5338.47it/s]  0%|          | 7880/2196017 [00:01<06:41, 5452.67it/s]  0%|          | 8572/2196017 [00:01<06:15, 5822.84it/s]  0%|          | 9205/2196017 [00:01<06:06, 5965.66it/s]  0%|          | 9965/2196017 [00:01<05:42, 

## Model

In [16]:
## 直接实现CNN_MULTICHANNEL
class CNN(nn.Module):
    def __init__(self, dim = 300):
        super(CNN, self).__init__()
        self.fliters = [3, 4, 5]
        self.fliters_num = [100, 100, 100]
        self.dropout_prob = 0.5
        self.classes = len(LABEL.vocab)-1
        self.embedding_dim = dim
        
        self.embedding = torch.nn.Embedding.from_pretrained(TEXT.vocab.vectors)
        self.embedding_pretrain = torch.nn.Embedding.from_pretrained(TEXT.vocab.vectors, False)
        
        self.conv_1 = torch.nn.Conv2d(2, self.fliters_num[0], (self.fliters[0], self.embedding_dim))
        self.conv_2 = torch.nn.Conv2d(2, self.fliters_num[1], (self.fliters[1], self.embedding_dim))
        self.conv_3 = torch.nn.Conv2d(2, self.fliters_num[2], (self.fliters[2], self.embedding_dim))
        self.dropout = nn.Dropout(self.dropout_prob)
        self.fc = nn.Linear(300, self.classes)
        
    def forward(self, input):
        i1 = self.embedding(input)
        i2 = self.embedding_pretrain(input)
        x = torch.stack([i1, i2], dim = 1)
        x1 = F.relu(self.conv_1(x)).squeeze(3)
        x1 = F.max_pool1d(x1, x1.shape[1]).squeeze(2)
        x2 = F.relu(self.conv_1(x)).squeeze(3)
        x2 = F.max_pool1d(x2, x2.shape[1]).squeeze(2)
        x3 = F.relu(self.conv_1(x)).squeeze(3)
        x3 = F.max_pool1d(x3, x3.shape[1]).squeeze(2)
        x = torch.cat((x1, x2, x3), 1)
        x = self.dropout(x)
        x = self.fc(x)
        return x
        

In [18]:
SETS = ['train', 'test', 'val']
dataloaders = {}
dataloaders['train'] = train_iter
dataloaders['test'] = test_iter
dataloaders['val'] = val_iter

In [78]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = CNN().to(device)
params = filter(lambda x : x.requires_grad, model.parameters())
optimzer = torch.optim.Adadelta(params, lr=1e-2)
criterion = nn.CrossEntropyLoss()

## Train

In [79]:
def train_val(model, criterion, optimizer, num_epochs=100):
    since = time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    for epoch in range(num_epochs):
        print('Epoc {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            running_loss = 0.0
            running_corrects = 0
            for index, data in enumerate(dataloaders[phase]):
                inputs, labels = data.text[0], data.label
                labels.sub_(1)
                inputs, labels = inputs.to(device), labels.to(device)
            
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    # Forward pass
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    # Compute loss
                    loss = criterion(outputs, labels)

                    # Compute gradients and update parameters if train
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size()[0]
                running_corrects += torch.sum(preds == labels).item()

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects / len(dataloaders[phase].dataset)
        
            print('{} Loss: {:.4f} Acc.: {:.2f} %'.format(
                phase.title(), epoch_loss, epoch_acc * 100))
        
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()
    
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best Accuracy: {:.2f} %'.format(best_acc * 100))
    model.load_state_dict(best_model_wts)
    
    return model

In [80]:
model = train_val(model, criterion, optimzer)

Epoc 0/99
----------
Train Loss: 1.6027 Acc.: 24.51 %
Val Loss: 1.5631 Acc.: 31.06 %

Epoc 1/99
----------
Train Loss: 1.5617 Acc.: 29.31 %
Val Loss: 1.5492 Acc.: 30.61 %

Epoc 2/99
----------
Train Loss: 1.5437 Acc.: 31.64 %
Val Loss: 1.5352 Acc.: 33.88 %

Epoc 3/99
----------
Train Loss: 1.5261 Acc.: 33.51 %
Val Loss: 1.5196 Acc.: 35.42 %

Epoc 4/99
----------
Train Loss: 1.5086 Acc.: 34.55 %
Val Loss: 1.5029 Acc.: 35.24 %

Epoc 5/99
----------
Train Loss: 1.4862 Acc.: 36.45 %
Val Loss: 1.4837 Acc.: 36.15 %

Epoc 6/99
----------
Train Loss: 1.4671 Acc.: 37.28 %
Val Loss: 1.4651 Acc.: 36.24 %

Epoc 7/99
----------
Train Loss: 1.4437 Acc.: 39.17 %
Val Loss: 1.4471 Acc.: 36.51 %

Epoc 8/99
----------
Train Loss: 1.4225 Acc.: 39.55 %
Val Loss: 1.4271 Acc.: 36.60 %

Epoc 9/99
----------
Train Loss: 1.4012 Acc.: 40.33 %
Val Loss: 1.4096 Acc.: 37.24 %

Epoc 10/99
----------
Train Loss: 1.3831 Acc.: 41.15 %
Val Loss: 1.3942 Acc.: 37.33 %

Epoc 11/99
----------
Train Loss: 1.3655 Acc.: 41.54 

In [81]:
def test(model):
    model.eval()
    running_corrects = 0
    with torch.no_grad():
        for index, data in enumerate(dataloaders['test']):
            inputs, labels = data.text[0], data.label
            labels.sub_(1)
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            running_corrects += torch.sum(preds == labels).item()
    test_acc = running_corrects / len(dataloaders['test'].dataset)
    print('Test Acc.: {:.2f} %'.format(test_acc * 100))

In [82]:
test(model)

Test Acc.: 45.29 %
