In [1]:
import pandas as pd
import numpy as np
import collections
import re
import torch
from torch.optim import Adam
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import log_loss, roc_auc_score
import warnings
import torch.nn as nn
from tqdm import tqdm
import random
import gensim
from torchcontrib.optim import SWA
import os
from torch.utils import data
from torch import nn
import torch.nn.functional as F
from torch.optim import *
torch.set_printoptions(edgeitems=768)
warnings.filterwarnings("ignore")
np.set_printoptions(threshold=np.inf)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# 设置基本参数
MAX_LEN = 100
BATCH_SIZE = 16
SEED = 9797
NAME = 'capsuleNet'
random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if DEVICE=='cuda':
    torch.cuda.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
DEVICE

'cuda'

In [2]:
train_data = pd.read_csv('/home/zyf/Summer game2021/Datafountain/datasets/datagrand_2021_train.csv')
test_data = pd.read_csv('/home/zyf/Summer game2021/Datafountain/datasets/datagrand_2021_test.csv') 
# train_data.columns=['report_ID','description','label']
# test_data.columns=['report_ID','description']

# temp=[i[:-1] for i in train_data['report_ID'].values]
# train_data['report_ID']=temp
# temp=[i[:-1] for i in test_data['report_ID'].values]
# test_data['report_ID']=temp

temp=[i.strip('|').strip() for i in train_data['text'].values]
train_data['text']=temp
temp=[i.strip('|').strip() for i in test_data['text'].values]
test_data['text']=temp

temp_label=[i.strip('|').strip() for i in train_data['label'].values]
label2idx = {}
idx2label = {}
for idx, label in enumerate(train_data['label'].unique()):
    label2idx[label] = idx
    idx2label[idx] = label
temp_label2idx = [label2idx[label] for label in train_data['label'].values]
train_data['label']=temp_label2idx
train_data

Unnamed: 0,id,text,label
0,0,7442 27878 9601 235 4004 10636 19121 28648 227...,0
1,1,2281 24058 20163 22737 25572 1845 10446 28438 ...,0
2,2,12163 8224 13343 26307 18947 10922 10446 8679 ...,1
3,3,28685 16037 3261 14485 26363 16037 29555 8766 ...,2
4,4,10974 16249 8266 16328 2103 4198 929 4685 2254...,2
...,...,...,...
14004,14004,17060 21114 4106 14131 19047 10647 9531 26694 ...,20
14005,14005,1662 5292 11160 648 2281 29541 3328 9601 28874...,7
14006,14006,20097 23904 18496 10893 1826 15465 16856 27380...,4
14007,14007,15991 4685 19121 5609 104 13839 23316 1586 ， 9...,24


In [3]:
all_sentences = pd.concat([train_data['text'],test_data['text']]).reset_index(drop=True)
all_sentences.drop_duplicates().reset_index(drop=True, inplace=True)
all_sentences = all_sentences.apply(lambda x:x.split(' ')).tolist()
if not os.path.exists('./embedding/w2v.model'): 
    w2v_model = gensim.models.word2vec.Word2Vec(all_sentences, sg=1, vector_size=300, window=7,min_count=1,negative=3,sample=0.001,hs=1,seed=452)
    w2v_model.save('./embedding/w2v.model')
else:
    w2v_model = gensim.models.word2vec.Word2Vec.load("./embedding/w2v.model")
    
if not os.path.exists('./embedding/fasttext.model'): 
    fasttext_model = gensim.models.FastText(all_sentences, seed=452, vector_size=100, min_count=1, epochs=20, window=2)
    fasttext_model.save('./embedding/fasttext.model')
else:
    fasttext_model = gensim.models.word2vec.Word2Vec.load("./embedding/fasttext.model")

In [4]:
train_dataset = []
for i in tqdm(range(len(train_data))):
    train_dict = {}
    train_dict['id'] = train_data.loc[i, 'id']
    train_dict['text'] = train_data.loc[i, 'text']
    train_dict['label'] = train_data.loc[i, 'label']
    train_dataset.append(train_dict)
test_dataset = []
for i in tqdm(range(len(test_data))):
    test_dict = {}
    test_dict['id'] = test_data.loc[i, 'id']
    test_dict['text'] = test_data.loc[i, 'text']
    test_dict['label'] = ''
    test_dataset.append(test_dict)

100%|██████████| 14009/14009 [00:00<00:00, 57478.98it/s]
100%|██████████| 6004/6004 [00:00<00:00, 71889.70it/s]


In [5]:
train_dataset

[{'id': 0,
  'text': '7442 27878 9601 235 4004 10636 19121 28648 22737 10935 10922 29296 3263 8194 17799 ， 9601 4004 28648 22737 19897 8194 929 235 23926 28648 22737 5933 19052 715 25945 ， 8194 2281 10893 26694 8 10922 20097 30327 1602 23897 4274 6762 2545 235 13778 5091 29584 17281 17983 9614',
  'label': 0},
 {'id': 1,
  'text': '2281 24058 20163 22737 25572 1845 10446 28438 10300 17281 11160 20428 8194 26526 25682 ， 12163 2750 11693 9410 5292 26366 15912 10723 10300 10658 19047 10636 6809 16328 15766 4198 20923 28041 20781 13178 ， 7661 19121 14547 20431 3848 15469 23349 26526 8299 12520 9805 16575 11160 20428 8641 15316',
  'label': 0},
 {'id': 2,
  'text': '12163 8224 13343 26307 18947 10922 10446 8679 12052 27145 25483 19121 7424 23308 7251 8100 ， 25682 12567 1654 2106 10039 25483 8100 10647 5540 12052 12567 17857 4397 4274 11962 20788 17281 19339 8258 ， 25658 4529 12021 11100 30181 749 7251 29099 4198 20060',
  'label': 1},
 {'id': 3,
  'text': '28685 16037 3261 14485 26363 16037

In [6]:
class DataSet(data.Dataset):
    def __init__(self, data, mode='train'):
        self.data = data
        self.mode = mode
        self.dataset = self.get_data(self.data,self.mode)
        
    def get_data(self, data, mode):
        dataset = []
        global s
        for data_li in tqdm(data):
            text = data_li['text'].split(' ')
            text = [w2v_model.wv.key_to_index[s]+1 if s in w2v_model.wv else 0 for s in text]
            if len(text) < MAX_LEN:
                text += [0] * (MAX_LEN - len(text))
            else:
                text = text[:MAX_LEN]
            label = self.get_dumm(data_li['label'])
            dataset_dict = {'text':text, 'label':label}
            dataset.append(dataset_dict)
        return dataset
    
    def get_dumm(self,s):
        re = [0] * 35
        if s == '':
            return re
        else:
            tmp = [int(i) for i in str(s).split(' ')]
            for i in tmp:
                re[i] = 1
        return re
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        data = self.dataset[idx]
        description = torch.tensor(data['text'])
        if self.mode == 'test':
            return description
        else:
            label = torch.tensor(data['label'])
            return description, label

def get_dataloader(dataset, mode):
    torchdata = DataSet(dataset, mode=mode)
    if mode == 'train':
        dataloader = torch.utils.data.DataLoader(torchdata, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, drop_last=True)
    elif mode == 'test':
        dataloader = torch.utils.data.DataLoader(torchdata, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, drop_last=False)
    elif mode == 'valid':
        dataloader = torch.utils.data.DataLoader(torchdata, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, drop_last=False)
    return dataloader, torchdata

train_dataloader, train_torchdata = get_dataloader(train_dataset, mode='train')
test_dataloader, test_torchdata = get_dataloader(test_dataset, mode='test')

100%|██████████| 14009/14009 [00:00<00:00, 28683.64it/s]
100%|██████████| 6004/6004 [00:00<00:00, 33926.01it/s]


In [7]:
class CyclicLR(object):
    def __init__(self, optimizer, base_lr=1e-3, max_lr=6e-3,
                 step_size=2000, mode='triangular', gamma=1.,
                 scale_fn=None, scale_mode='cycle', last_batch_iteration=-1):

        if not isinstance(optimizer, Optimizer):
            raise TypeError('{} is not an Optimizer'.format(
                type(optimizer).__name__))
        self.optimizer = optimizer

        if isinstance(base_lr, list) or isinstance(base_lr, tuple):
            if len(base_lr) != len(optimizer.param_groups):
                raise ValueError("expected {} base_lr, got {}".format(
                    len(optimizer.param_groups), len(base_lr)))
            self.base_lrs = list(base_lr)
        else:
            self.base_lrs = [base_lr] * len(optimizer.param_groups)

        if isinstance(max_lr, list) or isinstance(max_lr, tuple):
            if len(max_lr) != len(optimizer.param_groups):
                raise ValueError("expected {} max_lr, got {}".format(
                    len(optimizer.param_groups), len(max_lr)))
            self.max_lrs = list(max_lr)
        else:
            self.max_lrs = [max_lr] * len(optimizer.param_groups)

        self.step_size = step_size

        if mode not in ['triangular', 'triangular2', 'exp_range'] \
                and scale_fn is None:
            raise ValueError('mode is invalid and scale_fn is None')

        self.mode = mode
        self.gamma = gamma

        if scale_fn is None:
            if self.mode == 'triangular':
                self.scale_fn = self._triangular_scale_fn
                self.scale_mode = 'cycle'
            elif self.mode == 'triangular2':
                self.scale_fn = self._triangular2_scale_fn
                self.scale_mode = 'cycle'
            elif self.mode == 'exp_range':
                self.scale_fn = self._exp_range_scale_fn
                self.scale_mode = 'iterations'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode

        self.batch_step(last_batch_iteration + 1)
        self.last_batch_iteration = last_batch_iteration

    def batch_step(self, batch_iteration=None):
        if batch_iteration is None:
            batch_iteration = self.last_batch_iteration + 1
        self.last_batch_iteration = batch_iteration
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

    def _triangular_scale_fn(self, x):
        return 1.

    def _triangular2_scale_fn(self, x):
        return 1 / (2. ** (x - 1))

    def _exp_range_scale_fn(self, x):
        return self.gamma**(x)

    def get_lr(self):
        step_size = float(self.step_size)
        cycle = np.floor(1 + self.last_batch_iteration / (2 * step_size))
        x = np.abs(self.last_batch_iteration / step_size - 2 * cycle + 1)

        lrs = []
        param_lrs = zip(self.optimizer.param_groups, self.base_lrs, self.max_lrs)
        for param_group, base_lr, max_lr in param_lrs:
            base_height = (max_lr - base_lr) * np.maximum(0, (1 - x))
            if self.scale_mode == 'cycle':
                lr = base_lr + base_height * self.scale_fn(cycle)
            else:
                lr = base_lr + base_height * self.scale_fn(self.last_batch_iteration)
            lrs.append(lr)
        return lrs

In [8]:
Num_capsule = 5
Dim_capsule = 5
class Caps_Layer(nn.Module):
    def __init__(self, input_dim_capsule, num_capsule=Num_capsule, dim_capsule=Dim_capsule, \
                 routings=4, kernel_size=(9, 1), share_weights=True,
                 activation='default', **kwargs):
        super(Caps_Layer, self).__init__(**kwargs)
        self.T_epsilon = 1e-7
        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = 4
        self.kernel_size = kernel_size  # 暂时没用到
        self.share_weights = share_weights
        if activation == 'default':
            self.activation = self.squash
        else:
            self.activation = nn.ReLU(inplace=True)

        if self.share_weights:
            self.W = nn.Parameter(
                nn.init.xavier_normal_(torch.empty(1, input_dim_capsule, self.num_capsule * self.dim_capsule)))
        else:
            self.W = nn.Parameter(
                torch.randn(BATCH_SIZE, input_dim_capsule, self.num_capsule * self.dim_capsule))  # 64即batch_size

    def forward(self, x):

        if self.share_weights:
            u_hat_vecs = torch.matmul(x, self.W)
        else:
            print('add later')

        batch_size = x.size(0)
        input_num_capsule = x.size(1)
        u_hat_vecs = u_hat_vecs.view((batch_size, input_num_capsule,
                                      self.num_capsule, self.dim_capsule))
        u_hat_vecs = u_hat_vecs.permute(0, 2, 1, 3)  # 转成(batch_size,num_capsule,input_num_capsule,dim_capsule)
        b = torch.zeros_like(u_hat_vecs[:, :, :, 0])  # (batch_size,num_capsule,input_num_capsule)

        for i in range(self.routings):
            b = b.permute(0, 2, 1)
            c = F.softmax(b, dim=2)
            c = c.permute(0, 2, 1)
            b = b.permute(0, 2, 1)
            outputs = self.activation(torch.einsum('bij,bijk->bik', (c, u_hat_vecs)))  # batch matrix multiplication
            # outputs shape (batch_size, num_capsule, dim_capsule)
            if i < self.routings - 1:
                b = torch.einsum('bik,bijk->bij', (outputs, u_hat_vecs))  # batch matrix multiplication
        return outputs  # (batch_size, num_capsule, dim_capsule)

    # text version of squash, slight different from original one
    def squash(self, x, axis=-1):
        s_squared_norm = (x ** 2).sum(axis, keepdim=True)
        scale = torch.sqrt(s_squared_norm + self.T_epsilon)
        return x / scale
    
class Attention(nn.Module):
    def __init__(self, feature_dim, step_dim, bias=True, **kwargs):
        super(Attention, self).__init__(**kwargs)
        
        self.supports_masking = True

        self.bias = bias
        self.feature_dim = feature_dim
        self.step_dim = step_dim
        self.features_dim = 0
        
        weight = torch.zeros(feature_dim, 1)
        nn.init.xavier_uniform_(weight)
        self.weight = nn.Parameter(weight)
        
        if bias:
            self.b = nn.Parameter(torch.zeros(step_dim))
        
    def forward(self, x, mask=None):
        feature_dim = self.feature_dim
        step_dim = self.step_dim

        eij = torch.mm(
            x.contiguous().view(-1, feature_dim), 
            self.weight
        ).view(-1, step_dim)
        
        if self.bias:
            eij = eij + self.b
            
        eij = torch.tanh(eij)
        a = torch.exp(eij)
        
        if mask is not None:
            a = a * mask

        a = a / torch.sum(a, 1, keepdim=True) + 1e-10

        weighted_input = x * torch.unsqueeze(a, -1)
        return torch.sum(weighted_input, 1)
    
class NeuralNet(nn.Module):
    def __init__(self,vocab_size,embedding_dim,embeddings=None):
        super(NeuralNet, self).__init__()
        self.num_classes = 35
        fc_layer = 256
        hidden_size = 128
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        if embeddings:
            w2v_model = gensim.models.word2vec.Word2Vec.load("./embedding/w2v.model").wv
            fasttext_model = gensim.models.word2vec.Word2Vec.load("./embedding/fasttext.model").wv
            w2v_embed_matrix = w2v_model.vectors
            fasttext_embed_matrix = fasttext_model.vectors
#             embed_matrix = w2v_embed_matrix         
            embed_matrix = np.concatenate([w2v_embed_matrix, fasttext_embed_matrix], axis=1)
            oov_embed = np.zeros((1, embed_matrix.shape[1]))
            embed_matrix = torch.from_numpy(np.vstack((oov_embed,embed_matrix)))
            self.embedding.weight.data.copy_(embed_matrix)
            self.embedding.weight.requires_grad = False
        
        self.embedding_dropout = nn.Dropout2d(0.1)
        self.lstm = nn.GRU(embedding_dim, hidden_size,2, bidirectional=True, batch_first=True)
        self.gru = nn.GRU(hidden_size * 2, hidden_size,2, bidirectional=True, batch_first=True)
        self.tdbn = nn.BatchNorm2d(1)
        self.lstm_attention = Attention(hidden_size * 2, MAX_LEN)
        self.gru_attention = Attention(hidden_size * 2, MAX_LEN)
        self.bn = nn.BatchNorm1d(fc_layer)
        self.linear = nn.Linear(hidden_size*8+1, fc_layer)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        self.output = nn.Linear(fc_layer, self.num_classes)
        self.lincaps = nn.Linear(Num_capsule * Dim_capsule, 1)
        self.caps_layer = Caps_Layer(hidden_size*2)
    def forward(self, x, label=None):
        
#         Capsule(num_capsule=10, dim_capsule=10, routings=4, share_weights=True)(x)
        h_embedding = self.embedding(x)
        h_embedding = torch.squeeze(
            self.embedding_dropout(torch.unsqueeze(h_embedding, 0)))
        h_embedding = self.tdbn(h_embedding.unsqueeze(1)).squeeze(1)
        h_lstm, _ = self.lstm(h_embedding)
        h_gru, _ = self.gru(h_lstm)

        ##Capsule Layer        
        content3 = self.caps_layer(h_gru)
        content3 = self.dropout(content3)
        batch_size = content3.size(0)
        content3 = content3.view(batch_size, -1)
        content3 = self.relu(self.lincaps(content3))

        ##Attention Layer
        h_lstm_atten = self.lstm_attention(h_lstm)
        h_gru_atten = self.gru_attention(h_gru)
        
        # global average pooling
        avg_pool = torch.mean(h_gru, 1)
        # global max pooling
        max_pool, _ = torch.max(h_gru, 1)
        
        conc = torch.cat((h_lstm_atten, h_gru_atten,content3, avg_pool, max_pool), 1)
        conc = self.relu(self.linear(conc))
        conc = self.bn(conc)
        out = self.dropout(self.output(conc))
        if label is not None:
            loss_fct = nn.BCEWithLogitsLoss()
            loss = loss_fct(out.view(-1,self.num_classes).float(), label.view(-1,self.num_classes).float())
            return loss
        else:
            return out

In [82]:
x_pre = torch.randn([3,5])
y_true = torch.tensor([[0,0,1,0,0],
                      [0,0,0,1,0],
                      [1,0,0,0,0]])
from sklearn.metrics import log_loss, roc_auc_score, f1_score

In [83]:
x_pre = x_pre.sigmoid().detach().cpu().numpy()
y_true = y_true.detach().cpu().numpy()

In [85]:
auc = roc_auc_score(y_true,x_pre, multi_class='ovo')
f1 = f1_score(y_true, x_pre, average='macro')

ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

In [86]:
def metric_mlogloss(label,pred):
    score = 0
    for i in range(len(pred)):
        for j in range(17):
            if pred[i][j] == 0:
                pred[i][j] +=1e-10
            elif pred[i][j] == 1:
                pred[i][j] -=1e-10
            score += label[i][j]*np.log(pred[i][j])+(1-label[i][j])*np.log(1-pred[i][j])
    score /= (len(pred)*17*(-1))
    return 1-score

def validation_funtion(model, valid_dataloader, valid_torchdata, mode='valid'):
    model.eval()
    pred_list = []
    labels_list = []
    if mode == 'valid':
        for i, (description, label) in enumerate(tqdm(valid_dataloader)):
            output = model(description.to(DEVICE))
            pred_list += output.sigmoid().detach().cpu().numpy().tolist()
            labels_list += label.detach().cpu().numpy().tolist()
        print(type(labels_list))
        print("##################################")
        print(pred_list)
        auc = roc_auc_score(labels_list,pred_list, multi_class='ovo')
        logloss = log_loss(labels_list, pred_list)
        mlogloss = metric_mlogloss(labels_list, pred_list)
        return mlogloss, auc, logloss
    else:
        for i, (description) in enumerate(tqdm(valid_dataloader)):
            output = model(description.to(DEVICE))
            pred_list += output.sigmoid().detach().cpu().numpy().tolist()
        return pred_list
    

In [87]:
                            
def train(model, train_dataloader, valid_dataloader, valid_torchdata, epochs, early_stop=None):
    global logger
#     ema = EMA(model, 0.999)
#     ema.register()
    param_optimizer = list(model.named_parameters())
    embed_pa = ['embedding.weight']
    optimizer_grouped_parameters = [{'params': [p for n, p in param_optimizer if not any(nd in n for nd in embed_pa)]},
                                    {'params': model.embedding.parameters(), 'lr': 5e-5}]
    optimizer = AdamW(optimizer_grouped_parameters, lr=1e-3, amsgrad=True, weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=3, T_mult=2, eta_min=1e-5, last_epoch=-1)
#     scheduler = CyclicLR(optimizer, base_lr=1e-3, max_lr=3e-3,
#                step_size=30, mode='exp_range',
#                gamma=0.99994)
#     opt = SWA(optimizer, swa_start=100, swa_freq=5, swa_lr=1e-4)
    total_loss = []
    train_loss = []
    best_mlogloss = -np.inf
    best_auc = -np.inf
    best_loss = np.inf
    no_improve = 0
    for epoch in range(epochs):
        model.train()
#         fgm = FGM(model)
        bar = tqdm(train_dataloader)
        for i, (description, label) in enumerate(bar):
            optimizer.zero_grad()
            output = model(description.to(DEVICE), label.to(DEVICE))
            loss = output
            loss.backward()
            train_loss.append(loss.item())
            
#             fgm.attack()
#             loss_adv = model(describe.to(DEVICE), label.to(DEVICE))
#             loss_ad = loss_adv
#             loss_ad.backward()
#             fgm.restore()
            
            scheduler.step(epochs + i / len(train_dataloader))
#             scheduler.batch_step()
            optimizer.step()
#             ema.update()
            bar.set_postfix(tloss=np.array(train_loss).mean())
#         opt.swap_swa_sgd()
#         ema.apply_shadow()
        mlogloss, auc, logloss = validation_funtion(model, valid_dataloader, valid_torchdata, 'valid')
#         ema.restore()
        print('train_loss: {:.5f}, mlogloss: {:.5f}, auc: {:.5f}, log_loss: {:.5f}\n'.format(train_loss[-1],mlogloss,auc,logloss))
        logger.info('Epoch:[{}]\t mlogloss={:.5f}\t auc={:.5f}\t log_loss={:.5f}\t'.format(epoch,mlogloss,auc,logloss))
        global model_num
        if early_stop:
            if mlogloss > best_mlogloss:
                best_mlogloss = mlogloss
                best_auc = auc
                best_loss = train_loss[-1]
#                 ema.apply_shadow()
                torch.save(model.state_dict(), '{}_model_{}.bin'.format(NAME, model_num))
#                 ema.restore()
            else:
                no_improve += 1
            if no_improve == early_stop:
                model_num += 1
                break
            if epoch == epochs-1:
                model_num += 1
        else:
            if epoch >= epochs-1:
                torch.save(model.state_dict(), '{}_model_{}.bin'.format(NAME, model_num))
                model_num += 1
    return best_mlogloss, best_auc, best_loss

In [88]:
import logging
def get_logger(filename, verbosity=1, name=None):
    level_dict = {0: logging.DEBUG, 1: logging.INFO, 2: logging.WARNING}
    formatter = logging.Formatter(
        "[%(asctime)s][%(filename)s][line:%(lineno)d][%(levelname)s] %(message)s"
    )
    logger = logging.getLogger(name)
    logger.setLevel(level_dict[verbosity])
    fh = logging.FileHandler(filename, "w")
    fh.setFormatter(formatter)
    logger.addHandler(fh)
    sh = logging.StreamHandler()
    sh.setFormatter(formatter)
    logger.addHandler(sh)
    logger.removeHandler(sh)
    return logger

In [89]:
FOLD = 3
kf = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=SEED)
model_num = 1
test_preds_total = collections.defaultdict(list)
logger = get_logger('{}.log'.format(NAME))
best_mlogloss = []
best_auc = []
best_loss = []
for i, (train_index, test_index) in enumerate(kf.split(np.arange(train_data.shape[0]), train_data.label.values)):
    print(str(i+1), '-'*50)
    tra = [train_dataset[index] for index in train_index]
    val = [train_dataset[index] for index in test_index]
    print(len(tra))
    print(len(val))
    train_dataloader, train_torchdata = get_dataloader(tra, mode='train')
    valid_dataloader, valid_torchdata = get_dataloader(val, mode='valid')
    model = NeuralNet(w2v_model.wv.vectors.shape[0]+1,w2v_model.wv.vectors.shape[1]+fasttext_model.wv.vectors.shape[1],embeddings=True)
    model.to(DEVICE)
    mlogloss,auc,loss = train(model,train_dataloader,
                    valid_dataloader,
                    valid_torchdata,
                    epochs=1,
                    early_stop=5)
    torch.cuda.empty_cache()
    best_mlogloss.append(mlogloss)
    best_auc.append(auc)
    best_loss.append(loss)
for i in range(FOLD):
    print('- 第{}折中，best mlogloss: {}   best auc: {}   best loss: {}'.format(i+1, best_mlogloss[i], best_auc[i], best_loss[i]))

1 --------------------------------------------------
9339
4670


100%|██████████| 9339/9339 [00:00<00:00, 31497.80it/s]
100%|██████████| 4670/4670 [00:00<00:00, 33870.30it/s]
100%|██████████| 583/583 [00:26<00:00, 21.84it/s, tloss=0.278]
100%|██████████| 292/292 [00:03<00:00, 94.85it/s]
IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



train_loss: 0.18489, mlogloss: 0.90180, auc: 0.90709, log_loss: 1.68805

2 --------------------------------------------------
9339
4670


100%|██████████| 9339/9339 [00:00<00:00, 23313.05it/s]
100%|██████████| 4670/4670 [00:00<00:00, 34472.78it/s]
100%|██████████| 583/583 [00:26<00:00, 22.23it/s, tloss=0.28] 
100%|██████████| 292/292 [00:03<00:00, 93.15it/s]
IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



train_loss: 0.21690, mlogloss: 0.88960, auc: 0.89741, log_loss: 1.74496

3 --------------------------------------------------
9340
4669


100%|██████████| 9340/9340 [00:00<00:00, 35572.35it/s]
100%|██████████| 4669/4669 [00:00<00:00, 35713.29it/s]
  8%|▊         | 44/583 [00:01<00:22, 23.86it/s, tloss=0.682]


KeyboardInterrupt: 

- 第1折中，best mlogloss: 0.9754145609566004   best auc: 0.9989513662197707   best loss: 0.370415061712265
- 第2折中，best mlogloss: 0.9775772409438741   best auc: 0.9973311857745906   best loss: 0.3379441797733307
- 第3折中，best mlogloss: 0.9772133082300454   best auc: 0.9960571601879297   best loss: 0.3513079881668091
- 第4折中，best mlogloss: 0.9771865291289459   best auc: 0.9979561512414352   best loss: 0.3584481477737427
- 第5折中，best mlogloss: 0.9822103989262112   best auc: 0.9976901364500118   best loss: 0.31363099813461304
- 第6折中，best mlogloss: 0.9818852323759074   best auc: 0.9982848531202542   best loss: 0.3530382215976715
- 第7折中，best mlogloss: 0.9791612508478834   best auc: 0.9988892924512657   best loss: 0.33308154344558716
- 第8折中，best mlogloss: 0.98057134271314   best auc: 0.9992422836399381   best loss: 0.33398792147636414
- 第9折中，best mlogloss: 0.9789818534174326   best auc: 0.9985592272795051   best loss: 0.3301367461681366
- 第10折中，best mlogloss: 0.980113297001539   best auc: 0.9970231641648527   best loss: 0.3556895852088928

In [72]:
# model_num = 11
model = NeuralNet(w2v_model.wv.vectors.shape[0]+1,w2v_model.wv.vectors.shape[1]+fasttext_model.wv.vectors.shape[1],embeddings=True)
model.to(DEVICE)
test_preds_total = []  #[6004,6004,6004]
test_dataloader, test_torchdata = get_dataloader(test_dataset, mode='test')
for i in range(1,model_num):
    model.load_state_dict(torch.load('{}_model_{}.bin'.format(NAME, i)))
    test_pred_results = validation_funtion(model, test_dataloader, test_torchdata, 'test')
    test_preds_total.append(test_pred_results)
test_preds_merge = np.sum(test_preds_total, axis=0) / (model_num-1)#[6004, 35]
test_pre_tensor = torch.tensor(test_preds_merge)
test_pre = torch.max(test_pre_tensor,1)[1]
pre_submit_label = [idx2label[i] for i in test_pre.tolist()]
test_submit = test_data.drop(['text'], axis=1)
test_submit['label'] = pre_submit_label

100%|██████████| 6004/6004 [00:00<00:00, 40548.49it/s]
100%|██████████| 376/376 [00:03<00:00, 97.41it/s]
100%|██████████| 376/376 [00:04<00:00, 93.95it/s]
100%|██████████| 376/376 [00:04<00:00, 93.64it/s]


In [74]:
test_submit

Unnamed: 0,id,label
0,0,2-6
1,1,1-4
2,2,4-7
3,3,5-30
4,4,5-30
...,...,...
5999,5999,2-2
6000,6000,1-9
6001,6001,2-3
6002,6002,1-9


In [76]:
submit_file = '/home/zyf/Summer game2021/Datafountain/submits/submit.csv'
test_submit.to_csv(submit_file)