In [0]:
!pip install transformers




In [0]:
import os
path = "/content/drive/My Drive/NLP/sentiment_compete"
os.chdir(path)

In [0]:
import torch
import pandas as pd
import random
import numpy as np
from tqdm import tqdm
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [0]:
from transformers import BertTokenizer,BertConfig
from transformers import BertModel


In [0]:
train_file_path = 'data/train_weibo_clean.csv'
train09_file_path = 'data/train_weibo_09.csv'
dev01_file_path = 'data/train_weibo_01.csv'
bert_model_path = 'bert_base_chinese/bert-base-chinese-pytorch_model.bin'
bert_config_path = 'bert_base_chinese/bert-base-chinese-config.json'
bert_vocab_path = 'bert_base_chinese/bert-base-chinese-vocab.txt'
max_seq_len = 140
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [0]:
bert_config = BertConfig.from_pretrained(bert_config_path, output_hidden_states=True)
tokenizer = BertTokenizer.from_pretrained(bert_vocab_path)

Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated


In [0]:
def split_data(file_path, rate=0.9):
  df = pd.read_csv(file_path)
  df = df.iloc[np.random.permutation(len(df))]
  train_df = df.iloc[:int(len(df)*rate)]
  dev_df = df.iloc[int(len(df)*rate):]
  train_df.to_csv('data/train_weibo_09.csv', index=False)
  dev_df.to_csv('data/train_weibo_01.csv', index=False)


In [0]:
def load_data(file_path, tokenizer, max_seq_len, device):
  df = pd.read_csv(file_path)
  df = df[df['情感倾向'].isin(['0', '1','-1'])]
  df = df[['微博中文内容', '情感倾向']]
  inputs = tokenize_data(df, tokenizer, '微博中文内容', max_seq_len, device)
  outputs = torch.tensor(data=df['情感倾向'].astype(int) + 1, dtype=torch.long, device=device)
  return inputs, outputs

def tokenize_data(df, tokenizer, column, max_seq_len, device):
  input_ids = []
  attention_mask = []
  token_type_ids = []
  for content in tqdm(df[column]):
    inputs = tokenizer.encode_plus(text=str(content),
                    add_special_tokens=True,
                    max_length=max_seq_len,
                    truncation_strategy="longest_first",
                    return_attention_mask=True,
                    pad_to_max_length=True)
    ids, masks, token_type_id = inputs['input_ids'], inputs['attention_mask'], inputs['token_type_ids']
    input_ids.append(ids)
    attention_mask.append(masks)
    token_type_ids.append(token_type_id) # 3 n len
  return torch.tensor(data=[input_ids, attention_mask, token_type_ids], device=device).permute(1, 0, 2)


In [0]:
train_data = load_data(train09_file_path, tokenizer, max_seq_len, device)

100%|██████████| 89604/89604 [00:43<00:00, 2037.23it/s]


In [0]:
dev_data = load_data(dev01_file_path, tokenizer, max_seq_len, device)

100%|██████████| 9956/9956 [00:04<00:00, 2038.08it/s]


In [0]:
import torch.nn as nn
import torch.nn.functional as F
from transformers import BertModel


class BertCLSLAST3MEAN(nn.Module):
    def __init__(self, bert_model_path, config, seq_len):
        super(BertCLSLAST3MEAN, self).__init__()
        self.bert_model = BertModel.from_pretrained(bert_model_path, config=config)
        self.predict_fc = nn.Sequential(nn.Dropout(p=0.2),
                        nn.Linear(config.hidden_size * 2, config.hidden_size),
                        nn.Tanh(),
                        nn.Dropout(p=0.2),
                        nn.Linear(config.hidden_size, 3)
                        )
        self.predict_fc.apply(self.init_network)
        #for param in self.bert_model.parameters():
        #    param.requires_grad = False
        self.dropout = nn.Dropout(0.15)

    def forward(self, inputs):
        #(batch_size, 3, seq_max_len)i
        input_ids = inputs[:, 0] # (batch_size, seq_max_len)
        attention_mask = inputs[:, 1] # (batch_size, seq_max_len)
        token_type_ids = inputs[:, 2] # (batch_size, seq_max_len)
        #(batch_size, sequence_length, hidden_size)
        sequence_output, pooler_output, hidden_states = self.bert_model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        attention_mask_pad = attention_mask.unsqueeze(2).permute(0, 2, 1) # (batch_size, 1, seq_len)
        
        seq_lens = torch.sum(attention_mask, dim=1).unsqueeze(1)# (batch_size, 1)
       
        #h13 = self.get_mean(attention_mask_pad, hidden_states[-1], seq_lens)# (batch_size, hidden_size)
        """
        h12 = self.get_mean(attention_mask_pad, hidden_states[-2], seq_lens)# (batch_size, hidden_size)
        h11 = self.get_mean(attention_mask_pad, hidden_states[-3], seq_lens)# (batch_size, hidden_size)
        h10 = self.get_mean(attention_mask_pad, hidden_states[-4], seq_lens)# (batch_size, hidden_size)
        concat_hidden = torch.cat([h13, h12, h11, h10], dim=1)#(batch_size, 4, hidden_size)
        """
        #h13 = h13.squeeze()
        #mean_hidden = concat_hidden.mean(dim=1)#(batch_size, hidden_size)
        mean_cls = (hidden_states[-1][:,0] + hidden_states[-2][:,0] + hidden_states[-3][:,0]) / 3.0
        concat_input = torch.cat([pooler_output, mean_cls], dim=1)
        outputs = self.predict_fc(concat_input)
        return outputs

    def get_mean(self, attention_mask_pad, hidden_state, seq_lens):
        """
          hidden_state: (batch_size, seq_len, hidden_size)
          seq_lens (batch_size, 1)
        """
        hidden_state_real = hidden_state.permute(0, 2, 1) * attention_mask_pad # (batch_size, hidden_size, seq_len)
        hidden_state_real = hidden_state_real.permute(0, 2, 1)# (batch_size, seq_len, hidden_size)
        hidden_state_sum = torch.sum(hidden_state_real, dim=1)# (batch_size, hidden_size)
        return self.div_with_small_value(hidden_state_sum, seq_lens).unsqueeze(1)

    def div_with_small_value(self, a, b, eps=1.0):
        b = b * (b > eps).float() + (b <= eps).float() * 1.0
        return a / b 

    def init_network(self, module):
        if isinstance(module, nn.Linear):
          print(module.__class__.__name__)
          nn.init.xavier_uniform_(module.weight.data)
          nn.init.constant_(module.bias.data, 0.0)


In [0]:
import torch.nn as nn
import torch.nn.functional as F
from transformers import BertModel


class BertCLSLAST3(nn.Module):
    def __init__(self, bert_model_path, config, seq_len):
        super(BertCLSLAST3, self).__init__()
        self.bert_model = BertModel.from_pretrained(bert_model_path, config=config)
        self.predict_fc = nn.Sequential(nn.Dropout(p=0.2),
                        nn.Linear(config.hidden_size * 2, config.hidden_size),
                        nn.Tanh(),
                        nn.Dropout(p=0.2),
                        nn.Linear(config.hidden_size, 3)
                        )
        self.predict_fc.apply(self.init_network)
        #for param in self.bert_model.parameters():
        #    param.requires_grad = False
        self.dropout = nn.Dropout(0.15)

    def forward(self, inputs):
        #(batch_size, 3, seq_max_len)i
        input_ids = inputs[:, 0] # (batch_size, seq_max_len)
        attention_mask = inputs[:, 1] # (batch_size, seq_max_len)
        token_type_ids = inputs[:, 2] # (batch_size, seq_max_len)
        #(batch_size, sequence_length, hidden_size)
        sequence_output, pooler_output, hidden_states = self.bert_model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        attention_mask_pad = attention_mask.unsqueeze(2).permute(0, 2, 1) # (batch_size, 1, seq_len)
        seq_lens = torch.sum(attention_mask, dim=1).unsqueeze(1)# (batch_size, 1)
        h13 = self.get_mean(attention_mask_pad, hidden_states[-1], seq_lens)# (batch_size, hidden_size)
        h12 = self.get_mean(attention_mask_pad, hidden_states[-2], seq_lens)# (batch_size, hidden_size)
        h11 = self.get_mean(attention_mask_pad, hidden_states[-3], seq_lens)# (batch_size, hidden_size)
        h10 = self.get_mean(attention_mask_pad, hidden_states[-4], seq_lens)# (batch_size, hidden_size)
        concat_hidden = torch.cat([h13, h12, h11, h10], dim=1)#(batch_size, 4, hidden_size)
        mean_hidden = concat_hidden.mean(dim=1)#(batch_size, hidden_size)
        mean_cls = (hidden_states[-1][:,0] + hidden_states[-2][:,0] + hidden_states[-3][:,0]) / 3.0
        concat_input = torch.cat([mean_hidden, mean_cls], dim=1)
        outputs = self.predict_fc(concat_input)
        return outputs

    def get_mean(self, attention_mask_pad, hidden_state, seq_lens):
        """
          hidden_state: (batch_size, seq_len, hidden_size)
          seq_lens (batch_size, 1)
        """
        hidden_state_real = hidden_state.permute(0, 2, 1) * attention_mask_pad # (batch_size, hidden_size, seq_len)
        hidden_state_real = hidden_state_real.permute(0, 2, 1)# (batch_size, seq_len, hidden_size)
        hidden_state_sum = torch.sum(hidden_state_real, dim=1)# (batch_size, hidden_size)
        return self.div_with_small_value(hidden_state_sum, seq_lens).unsqueeze(1)

    def div_with_small_value(self, a, b, eps=1.0):
        b = b * (b > eps).float() + (b <= eps).float() * 1.0
        return a / b 

    def init_network(self, module):
        if isinstance(module, nn.Linear):
          print(module.__class__.__name__)
          nn.init.xavier_uniform_(module.weight.data)
          nn.init.constant_(module.bias.data, 0.0)


In [0]:
import torch.nn as nn
import torch.nn.functional as F
from transformers import BertModel


class ModelH(nn.Module):
    def __init__(self, bert_model_path, config, seq_len):
        super(ModelH, self).__init__()
        self.bert_model = BertModel.from_pretrained(bert_model_path, config=config)
        self.predict_fc = nn.Linear(in_features=config.hidden_size, out_features=3, bias=True)
        self.predict_fc.apply(self.init_network)
        #for param in self.bert_model.parameters():
        #    param.requires_grad = False
        self.dropout = nn.Dropout(0.15)

    def forward(self, inputs):
        #(batch_size, 3, seq_max_len)i
        input_ids = inputs[:, 0] # (batch_size, seq_max_len)
        attention_mask = inputs[:, 1] # (batch_size, seq_max_len)
        token_type_ids = inputs[:, 2] # (batch_size, seq_max_len)
        #(batch_size, sequence_length, hidden_size)
        sequence_output, pooler_output, hidden_states = self.bert_model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        attention_mask_pad = attention_mask.unsqueeze(2).permute(0, 2, 1) # (batch_size, 1, seq_len)
        #print("attention_mask_pad size", attention_mask_pad.size())
        seq_lens = torch.sum(attention_mask, dim=1).unsqueeze(1)# (batch_size, 1)
        # hidden_states([64, 140, 768])
        h13 = self.get_mean(attention_mask_pad, hidden_states[-1], seq_lens)# (batch_size, hidden_size)
        h12 = self.get_mean(attention_mask_pad, hidden_states[-2], seq_lens)# (batch_size, hidden_size)
        h11 = self.get_mean(attention_mask_pad, hidden_states[-3], seq_lens)# (batch_size, hidden_size)
        h10 = self.get_mean(attention_mask_pad, hidden_states[-4], seq_lens)# (batch_size, hidden_size)
        #print('hidden size', h10.size())
        concat_hidden = torch.cat([h13, h12, h11, h10], dim=1)#(batch_size, 4, hidden_size)
        #print('concat_hidden size', concat_hidden.size())
        mean_hidden = concat_hidden.mean(dim=1)#(batch_size, hidden_size)
        #print('mean hidden size', mean_hidden.size())
        inputs_fc = self.dropout(mean_hidden)
        outputs = self.predict_fc(inputs_fc)
        return outputs

    def get_mean(self, attention_mask_pad, hidden_state, seq_lens):
        """
          hidden_state: (batch_size, seq_len, hidden_size)
          seq_lens (batch_size, 1)
        """
        hidden_state_real = hidden_state.permute(0, 2, 1) * attention_mask_pad # (batch_size, hidden_size, seq_len)
        hidden_state_real = hidden_state_real.permute(0, 2, 1)# (batch_size, seq_len, hidden_size)
        hidden_state_sum = torch.sum(hidden_state_real, dim=1)# (batch_size, hidden_size)
        return self.div_with_small_value(hidden_state_sum, seq_lens).unsqueeze(1)

    def div_with_small_value(self, a, b, eps=1.0):
        b = b * (b > eps).float() + (b <= eps).float() * 1.0
        return a / b 

    def init_network(self, module):
        if isinstance(module, nn.Linear):
          print(module.__class__.__name__)
          nn.init.xavier_uniform_(module.weight.data)
          nn.init.constant_(module.bias.data, 0.0)


In [0]:
import torch.nn as nn
import torch.nn.functional as F
from transformers import BertModel
import torch


class Model(nn.Module):
    def __init__(self, bert_model_path, config, seq_len):
        super(Model, self).__init__()

        self.bert_model = BertModel.from_pretrained(bert_model_path, config=config)
        self.linear_1 = nn.Linear(in_features=seq_len, out_features=1, bias=True)
        self.linear_2 = nn.Linear(in_features=config.hidden_size * 2, out_features=3, bias=True)
        self.dropout = nn.Dropout(0.15)
        self.init_network()
        # for param in self.bert_model.parameters():
        #     param.requires_grad = False
 

    def forward(self, inputs):
        sequence_output, pooler_output, hidden_states  = self.bert_model(input_ids=inputs[:, 0], attention_mask=inputs[:, 1], token_type_ids=inputs[:, 2])
        outputs = self.linear_1(sequence_output.permute(0, 2, 1)).squeeze(dim=2)
        # (batch_size, sequence_length, hidden_size)-> (batch_size, hidden_size)
        outputs = F.relu(outputs)
        outputs = self.dropout(outputs)
        concat = torch.cat([h13, h12, h11, h10], dim=1)
        outputs = self.linear_2(outputs)  # (batch_size, hidden_size)->(batch_size,3)
        outputs = F.softmax(outputs, dim=1)
        return outputs

    def init_network(self):
        nn.init.xavier_normal_(self.linear_1.weight)
        nn.init.xavier_normal_(self.linear_2.weight)


In [0]:
import torch.nn as nn
import torch.nn.functional as F
from transformers import BertModel
import torch


class Model_POOL_H_ATTEN(nn.Module):
    def __init__(self, bert_model_path, config, seq_len):
        super(Model_POOL_H_ATTEN, self).__init__()

        self.bert_model = BertModel.from_pretrained(bert_model_path, config=config)
        self.linear_1 = nn.Linear(in_features=seq_len, out_features=1, bias=True)
        self.linear_2 = nn.Linear(in_features=config.hidden_size * 2, out_features=3, bias=True)
        self.dropout = nn.Dropout(0.15)
        self.init_network()
        # for param in self.bert_model.parameters():
        #     param.requires_grad = False
 

    def forward(self, inputs):
        sequence_output, pooler_output, hidden_states  = self.bert_model(input_ids=inputs[:, 0], attention_mask=inputs[:, 1], token_type_ids=inputs[:, 2])
        outputs = self.linear_1(sequence_output.permute(0, 2, 1)).squeeze(dim=2)
        # (batch_size, sequence_length, hidden_size)-> (batch_size, hidden_size)
        outputs = F.relu(outputs)
        outputs = self.dropout(outputs)
        concat = torch.cat([outputs, pooler_output], dim=1)
        outputs = self.linear_2(concat)  # (batch_size, hidden_size)->(batch_size,3)
        outputs = F.softmax(outputs, dim=1)
        return outputs

    def init_network(self):
        nn.init.xavier_normal_(self.linear_1.weight)
        nn.init.xavier_normal_(self.linear_2.weight)


In [0]:
class FocalLoss(nn.Module):
    def __init__(self, weight=None, reduction='mean', gamma=2, eps=1e-7):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.eps = eps
        self.ce = torch.nn.CrossEntropyLoss(weight=weight, reduction=reduction)

    def forward(self, inputs, target):
        logp = self.ce(inputs, target)
        p = torch.exp(-logp)
        loss = (1-p) ** self.gamma * logp
        return loss.mean()

In [0]:
from torch.utils.data import DataLoader, TensorDataset
def train(train_data, dev_data, model, batch_size, num_epochs, model_save_path, lr=0.0001):
  train_inputs, train_outputs = train_data
  train_dataset = TensorDataset(train_inputs, train_outputs)
  start_time = time.time()
  optimizer = optim.Adam(model.parameters(), lr=lr)
  total_batch = 0
  criterion = nn.CrossEntropyLoss()
  dev_per_batch = 500
  dev_best_loss = float('inf')
  last_improve = 0
  require_improvement = 1000
  model.train()
  for epoch in range(num_epochs):
    print('epoch [{}/{}]'.format(epoch + 1, num_epochs))
    for (inputs, labels) in DataLoader(train_dataset, batch_size=batch_size, shuffle=True):
      total_batch += 1
      model.zero_grad() 
      outputs = model(inputs)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()
      if total_batch % dev_per_batch == 0:
        true_labels = labels.data.cpu()
        predicts = torch.max(outputs.data, dim=1)[1].cpu().numpy()
        train_acc = metrics.accuracy_score(true_labels, predicts)
        time_dif = get_time_dif(start_time)
        dev_acc, dev_loss, report, confusion = evaluate(dev_data, model, batch_size)
        model.train()
        if dev_best_loss > dev_loss:
          dev_best_loss = dev_loss
          improve = '*'
          torch.save(model.state_dict(), model_save_path)
        else:
          improve = ' '
        msg = 'Epoch:{0:>2} Iter: {1:>6},  Train Loss: {2:>5.2},  Train Acc: {3:>6.2%},' \
                      '  Dev Loss: {4:>5.2},  Dev Acc: {5:>6.2%},  Time: {6} {7}'
        print(msg.format(epoch, total_batch, loss.item(), train_acc, dev_loss, dev_acc, time_dif, improve))
      
  evaluate(dev_data, model, batch_size)

In [0]:
def evaluate(dev_data, model, batch_size, coef=None):
  model.eval()
  labels_all = np.array([], dtype=int)
  predicts_all = np.array([], dtype=int)
  dev_inputs, dev_outputs = dev_data
  criterion = nn.CrossEntropyLoss()
  dev_dataset = TensorDataset(dev_inputs, dev_outputs)
  loss_total = 0
  with torch.no_grad():
    for (inputs, labels) in DataLoader(dataset=dev_dataset, batch_size=batch_size, shuffle=False):
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss_total += loss.item()
        predicts = torch.max(outputs.data, dim=1)[1].cpu().numpy()
        labels = labels.data.cpu().numpy()
        predicts_all = np.append(predicts_all, predicts)
        labels_all = np.append(labels_all, labels)
  acc = metrics.accuracy_score(labels_all, predicts_all)
  report = metrics.classification_report(labels_all, predicts_all, digits=4)
  confusion = metrics.confusion_matrix(labels_all, predicts_all)
  return acc, loss_total / len(dev_inputs) * batch_size, report, confusion

In [0]:
def predict(data_x, ids_all, model, coef, batch_size, output_path):
    model.load_state_dict(torch.load('save_model/Model_POOL_H_ATTEN'))
    model.eval()
    start_time = time.time()
    torch_coef = torch.tensor(coef, device=device).view(-1, 3)
    predicts_all = []
    for inputs,_ in tqdm(DataLoader(dataset=data_x, batch_size=batch_size, shuffle=False)):
        outputs = model(inputs)
        outputs = F.softmax(outputs, dim=1)
        outputs = outputs * torch_coef
        predicts = list(torch.max(outputs.data, dim=1)[1].cpu().numpy() - 1)
        predicts_all = predicts_all + predicts

    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)
    result_pd = pd.DataFrame(
        {
            'id': ids_all,
            'y': predicts_all
        }
    )
    result_pd.to_csv(output_path, index=False)
    print("finish !")

In [0]:
import time
from datetime import timedelta
def get_time_dif(start_time):
    """获取已使用时间"""
    end_time = time.time()
    time_dif = end_time - start_time
    return timedelta(seconds=int(round(time_dif)))

In [0]:
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn import metrics
import time
import numpy as np
import pandas as pd
from tqdm import tqdm

In [0]:
#focal_loss = FocalLoss()
model = Model_POOL_H_ATTEN(bert_model_path, bert_config, 140).to(device)

In [0]:
train(train_data, dev_data, model, 32, 10, 'save_model/Model_POOL_H_ATTEN', lr=0.000005)

epoch [1/10]
Epoch: 0 Iter:    500,  Train Loss:  0.69,  Train Acc: 90.62%,  Dev Loss:  0.82,  Dev Acc: 72.96%,  Time: 0:06:19 *
Epoch: 0 Iter:   1000,  Train Loss:  0.86,  Train Acc: 68.75%,  Dev Loss:  0.81,  Dev Acc: 73.20%,  Time: 0:14:06 *
Epoch: 0 Iter:   1500,  Train Loss:   0.7,  Train Acc: 84.38%,  Dev Loss:  0.81,  Dev Acc: 73.30%,  Time: 0:21:52 *
Epoch: 0 Iter:   2000,  Train Loss:  0.73,  Train Acc: 81.25%,  Dev Loss:   0.8,  Dev Acc: 74.20%,  Time: 0:29:38 *
Epoch: 0 Iter:   2500,  Train Loss:  0.78,  Train Acc: 78.12%,  Dev Loss:   0.8,  Dev Acc: 74.15%,  Time: 0:37:25 *
epoch [2/10]
Epoch: 1 Iter:   3000,  Train Loss:  0.81,  Train Acc: 75.00%,  Dev Loss:   0.8,  Dev Acc: 74.33%,  Time: 0:45:11 *
Epoch: 1 Iter:   3500,  Train Loss:  0.81,  Train Acc: 75.00%,  Dev Loss:   0.8,  Dev Acc: 74.64%,  Time: 0:52:58 *
Epoch: 1 Iter:   4000,  Train Loss:  0.84,  Train Acc: 68.75%,  Dev Loss:   0.8,  Dev Acc: 74.31%,  Time: 1:00:44  
Epoch: 1 Iter:   4500,  Train Loss:  0.59,  Tr

KeyboardInterrupt: ignored

In [0]:
dev_acc, dev_loss, report, confusion = evaluate(dev_data, model, 32)
print("report",report)

report               precision    recall  f1-score   support

           0     0.6712    0.5863    0.6259      1668
           1     0.7730    0.8379    0.8041      5767
           2     0.7496    0.6684    0.7066      2521

    accuracy                         0.7528      9956
   macro avg     0.7313    0.6975    0.7122      9956
weighted avg     0.7500    0.7528    0.7496      9956



In [0]:
def load_test_data(file_path, tokenizer, max_seq_len, device):
  df = pd.read_csv(file_path)
  df = df[['微博中文内容']]
  inputs = tokenize_data(df, tokenizer, '微博中文内容', max_seq_len, device)
  return inputs

In [0]:
test_input = load_test_data('data/test_weibo_clean.csv', tokenizer, max_seq_len, device)
test_tensor = TensorDataset(test_input, torch.rand(test_input.size()[0]))

100%|██████████| 10000/10000 [00:04<00:00, 2041.67it/s]


In [0]:
predict(test_tensor, test_df['微博id'], model, [2.88884269, 1.09375589, 2.86697647], 16, 'bert_ans.csv')

100%|██████████| 625/625 [01:26<00:00,  7.25it/s]

Time usage: 0:01:26
finish !





In [0]:
torch.save(model.state_dict(), 'save_model/bert_model')

In [0]:
!nvidia-smi

Wed Apr  1 13:45:27 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.64.00    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   73C    P0    71W / 149W |  10830MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
+-------

In [0]:
def get_threshold(dev_data, model, batch_size):
  model.eval()
  labels_all = np.array([], dtype=int)
  predicts_all = np.array([[1,1,1]], dtype=float)
  dev_inputs, dev_outputs = dev_data
  criterion = nn.CrossEntropyLoss()
  dev_dataset = TensorDataset(dev_inputs, dev_outputs)
  loss_total = 0
  with torch.no_grad():
    for (inputs, labels) in DataLoader(dataset=dev_dataset, batch_size=batch_size, shuffle=False):
        outputs = model(inputs)
        outputs = F.softmax(outputs, dim=1) 
        predict_np = outputs.data.cpu().numpy()
        labels = labels.data.cpu().numpy()
        predicts_all = np.append(predicts_all, predict_np, axis=0)
        labels_all = np.append(labels_all, labels)
  predicts_all = predicts_all[1:]
  optimizedRounder = OptimizedRounder()
  optimizedRounder.fit(predicts_all, labels_all)
  coef = optimizedRounder.get_coef()
  return coef

In [0]:
predicts_all, labels_all = get_threshold(train_data, model, 32)

(array([[0.21196282, 0.57606238, 0.21197481],
        [0.21868035, 0.54282802, 0.23849164],
        [0.21194977, 0.21194738, 0.57610285],
        ...,
        [0.21194674, 0.57610726, 0.21194595],
        [0.21195276, 0.57609522, 0.21195202],
        [0.21208781, 0.21242923, 0.57548296]]), array([1, 1, 2, ..., 1, 1, 2]))

In [0]:
print("predicts_all shape", predicts_all.shape)
print("labels_all shape", labels_all.shape)

predicts_all shape (9956, 3)
labels_all shape (9956,)


In [0]:
from functools import partial
import numpy as np
import scipy as sp

class OptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0

    def _f1_loss(self, coef, X, y):
        predict = X * coef
        predict_y = np.argmax(predict, axis =1)
        f1_score = metrics.f1_score(y, predict_y, average='macro')  
        return -f1_score * 1000

    def fit(self, X, y):
        loss_partial = partial(self._f1_loss, X=X, y=y)
        initial_coef = [2, 1.0, 2]
        res= sp.optimize.basinhopping(loss_partial, initial_coef, niter=1000)
        print(res.x)
        print(res.fun)
        self.coef_ = res.x
    def get_coef(self):
        return self.coef_

In [0]:
optimizedRounder = OptimizedRounder()
optimizedRounder.fit(predicts_all, labels_all)
coef = optimizedRounder.get_coef()

[2.88884269 1.09375589 2.86697647]
-723.4094685948046


In [0]:
coef

array([1.5 , 1.05, 1.  ])

In [0]:
torch_coef = torch.tensor([2.88884269,1.09375589,2.86697647]).view(-1, 3)

In [0]:
torch_coef

tensor([[2.8888, 1.0938, 2.8670]])

In [0]:
np.argmax(d, axis =1 )

array([2, 2])

In [0]:
predict_label = np.argmax(predicts_all * coef, axis =1)
f1_score = metrics.f1_score(labels_all, predict_label, average='macro')
print(f1_score)

0.7234094685948046


In [0]:
predict_label.shape

(9955,)

In [0]:
0.7122314128155406