# Preparation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install opencc-python-reimplemented
!pip install pycantonese

Collecting opencc-python-reimplemented
  Downloading opencc-python-reimplemented-0.1.6.tar.gz (484 kB)
[?25l[K     |▊                               | 10 kB 24.8 MB/s eta 0:00:01[K     |█▍                              | 20 kB 9.0 MB/s eta 0:00:01[K     |██                              | 30 kB 7.4 MB/s eta 0:00:01[K     |██▊                             | 40 kB 3.8 MB/s eta 0:00:01[K     |███▍                            | 51 kB 3.7 MB/s eta 0:00:01[K     |████                            | 61 kB 4.4 MB/s eta 0:00:01[K     |████▊                           | 71 kB 4.6 MB/s eta 0:00:01[K     |█████▍                          | 81 kB 3.5 MB/s eta 0:00:01[K     |██████                          | 92 kB 3.9 MB/s eta 0:00:01[K     |██████▊                         | 102 kB 4.3 MB/s eta 0:00:01[K     |███████▍                        | 112 kB 4.3 MB/s eta 0:00:01[K     |████████▏                       | 122 kB 4.3 MB/s eta 0:00:01[K     |████████▉                       | 133

In [None]:
import pandas as pd
import numpy as np
import re,string
import math
import opencc
from sklearn import preprocessing
import torch
import torch.nn as nn
import pkg_resources
import warnings
pkg_resources.get_distribution("xlrd").version
warnings.filterwarnings('ignore')

In [None]:
#Data pre-processing
def simplify_punctuation_and_whitespace(sentence):
    #remove urls
    sent = _replace_urls(sentence)
    #remove redundant punctuation
    sent = _simplify_punctuation(sent)
    #normilize whitespaces
    sent = _normalize_whitespace(sent)
    return sent

def _replace_urls(text):
    url_regex = r'(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})'
    text = text.replace('�','.')
    text = re.sub(r'^[\x00-\x7F]+|[\x00-\x7F]+$', '', text)#special case
    text = re.sub(url_regex, "<URL>", text)
    return text

def _simplify_punctuation(text):
    """
    This function simplifies doubled or more complex punctuation. The exception is '...'.
    """
    corrected = str(text)
    corrected = re.sub(r'([!?,;？❓！.])\1+', r'\1', corrected)
    corrected = re.sub(r'\.{2,}', r'...', corrected)
    return corrected

def _normalize_whitespace(text):
    """
    This function normalizes whitespaces, removing duplicates.
    """
    corrected = str(text)
    corrected = re.sub(r"//t",r"\t", corrected)
    corrected = re.sub(r"( )\1+",r"\1", corrected)
    corrected = re.sub(r"(\n)\1+",r"\1", corrected)
    corrected = re.sub(r"(\r)\1+",r"\1", corrected)
    corrected = re.sub(r"(\t)\1+",r"\1", corrected)
    return corrected.strip(" ")

In [None]:
#get data
data=pd.read_excel(r'/content/drive/MyDrive/DATA/dataset_feature.xlsx')
df=pd.DataFrame(data)
cc = opencc.OpenCC('s2hk')
for i in range(len(df)):
  text=cc.convert(df['text'].iloc[i]) #convert every Cantonese character into HK standard.
  text_new=simplify_punctuation_and_whitespace(text) #data pre-process.
  if len(text_new)==0:
    df.drop([i],inplace = True)
  else:
    df['text'].iloc[i]=text_new
print(len(df))

In [None]:
mask=(df['is_post']==1)
df_info=df.loc[mask] # dataframe for posts
df_info['branch_text']=df_info['text']
df_info['depth']=1
columns=['label', 'is_post', 'info_id', 'cmt_id', 'parent', 'text','branch_text','depth']
df_info=df_info[columns]
df_info['label']=df_info['label']-1

mask=(df['is_post']==0)
df2=df.loc[mask] # dataframe for comments
df2['last_sep']=0
df2['depth']=1 # iniatialize the depth for comments as 1
df2['branch_text']=''
columns=['label', 'info_id', 'cmt_id', 'parent','last_sep', 'text','branch_text','depth']
df2=df2[columns]
df2['label']=df2['label']-1

for i in range(len(df2)):
  if math.isnan(df2['parent'].iloc[i]):
    df2['parent'].iloc[i]=df2['info_id'].iloc[i] #for those comments dont have a "parent", the post nodes are their parents
  else:
    df2['parent'].iloc[i] = str(int(df2['parent'].iloc[i]))
df2.info()

In [None]:
Sep = '[SEP]'
lenths=[]#over-long lenths
emptys=[]#error nodes

def build_branch(dataframe, i, text_list):
  '''
  build sub-branches for branch BERT
  :param dataframe: dataframe 
  :param i: current pointer
  ;param text_list: current built subbranch
  :return: built sub-branch, which is the input of Branch-BERT
  '''
  if dataframe['parent'].iloc[i] == dataframe['info_id'].iloc[i]:
    '''if parent node is a post'''
    info_id = dataframe['info_id'].iloc[i]
    df_parent = df_info.loc[df_info['info_id']==info_id]
    if len(df_parent) == 1:
      text_list.append(dataframe['text'].iloc[i])
      text_list.append(df_parent['text'].iloc[0])
    else:
      print("INFO EMPTY: ", info_id)
  else:
    '''if parent node is a comment'''
    text_list.append(dataframe['text'].iloc[i])
    parent_id = str(dataframe['parent'].iloc[i])
    df_parent = df2.loc[df2['cmt_id'] == parent_id]
    if len(df_parent) == 1:
      '''recursion for sub-branch building'''
      build_branch(df_parent, 0, text_list)
    else:
      emptys.append(parent_id)#error
  text_list = list(reversed(text_list))#reverse the found branch: [post,cmt1,cmt2...cmtn]
  Str = Sep.join(text_list) #Separate each two consecutive instances with [SEP]
  return Str

'''iterate over the comments dataframe to build sub-branches'''
for i in range(len(df2)):
  text=[]
  text = build_branch(df2, i, text)  
  df2['branch_text'].iloc[i]=text
  df2['depth'].iloc[i]=len(text) # depth is the lenth of sub-branch
df2=df2[~df2['parent'].isin(emptys)]


# Branch Model
(Branch-BERT & Branch-BERT w/o CFE)

In [None]:
# coding:utf-8
import pycantonese
import nltk
import numpy
import jieba
import codecs
import os
class SummaryTxt:#abstarct generater
    def __init__(self):
        #character number n
        self.N = 100
        #cluster threshold
        self.CLUSTER_THRESHOLD = 5
        #top n sentences
        self.TOP_SENTENCES = 5
        #load stopwords
        self.stopwords = {}.fromkeys(pycantonese.stop_words())
    def _split_sentences(self,texts):
        '''
        split text to single sentences.（.!?。！？）are signals for splitting.
        :param texts: texts
        :return:
        '''
        splitstr = '.!?。！？'.encode('utf8').decode('utf8')
        start = 0
        index = 0  # position for each characters
        sentences = []
        for text in texts:
            if text in splitstr:  # check whether their is a spliter
                sentences.append(texts[start:index + 1]) 
                start = index + 1  
            index += 1
        if start < len(texts):
            sentences.append(texts[start:])  # in case that their is no spliter in the ending of the text
        return sentences

    def _score_sentences(self,sentences, topn_words):
        '''
        score stentences with top n key words
        :param sentences: sentence list
        :param topn_words: key words list
        :return:
        '''
        scores = []
        sentence_idx = -1
        for s in [pycantonese.segment(s) for s in sentences]:
            sentence_idx += 1
            word_idx = []
            for w in topn_words:
                try:
                    word_idx.append(s.index(w))  # keywords'index
                except ValueError:  # w is not in the sentence 
                    pass
            word_idx.sort()
            if len(word_idx) == 0:
                continue
            # for two consecutive words, comupute cluster
            clusters = []
            cluster = [word_idx[0]]
            i = 1
            while i < len(word_idx):
                if word_idx[i] - word_idx[i - 1] < self.CLUSTER_THRESHOLD:
                    cluster.append(word_idx[i])
                else:
                    clusters.append(cluster[:])
                    cluster = [word_idx[i]]
                i += 1
            clusters.append(cluster)
            # score each cluster, the maximum score of each cluster is the score for the sentence
            max_cluster_score = 0
            for c in clusters:
                significant_words_in_cluster = len(c)
                total_words_in_cluster = c[-1] - c[0] + 1
                score = 1.0 * significant_words_in_cluster * significant_words_in_cluster / total_words_in_cluster
                if score > max_cluster_score:
                    max_cluster_score = score
            scores.append((sentence_idx, max_cluster_score))
        return scores

    def summaryScoredtxt(self,text):
        # split text into sentences
        sentences = self._split_sentences(text)
        # split words
        words = [w for sentence in sentences for w in pycantonese.segment(sentence) if w not in self.stopwords if
                 len(w) > 1 and w != '\t']
        # get word frequency
        wordfre = nltk.FreqDist(words)
        # get words with top n frequency
        topn_words = [w[0] for w in sorted(wordfre.items(), key=lambda d: d[1], reverse=True)][:self.N]
        # score each sentences with the top n key words
        scored_sentences = self._score_sentences(sentences, topn_words)
        # use average number and std to filter non-trival sentences
        avg = numpy.mean([s[1] for s in scored_sentences])  # avg
        std = numpy.std([s[1] for s in scored_sentences])  # std
        summarySentences = []
        for (sent_idx, score) in scored_sentences:
            if score > (avg + 0.5 * std):
                summarySentences.append(sentences[sent_idx])
        return summarySentences

    def summaryTopNtxt(self,text):
        # split text into sentences
        sentences = self._split_sentences(text)
        # split words
        words = [w for sentence in sentences for w in pycantonese.segment(sentence) if w not in self.stopwrods if
                 len(w) > 1 and w != '\t']
        # get word frequency
        wordfre = nltk.FreqDist(words)
        # get words with top n frequency
        topn_words = [w[0] for w in sorted(wordfre.items(), key=lambda d: d[1], reverse=True)][:self.N]
        # score each sentences with the top n key words
        scored_sentences = self._score_sentences(sentences, topn_words)
        top_n_scored = sorted(scored_sentences, key=lambda s: s[1])[-self.TOP_SENTENCES:]
        top_n_scored = sorted(top_n_scored, key=lambda s: s[0])
        summarySentences = []
        for (idx, score) in top_n_scored:
            summarySentences.append(sentences[idx])
        return sentences

In [None]:
import numpy as np
import torch
from tqdm import tqdm
import time
from datetime import timedelta
import pandas as pd
import numpy as np
from sklearn import preprocessing
from transformers import BertTokenizer
PAD, CLS = '[PAD]', '[CLS]'  # padding token 

def build_dataset(config, mode= 'branch'):
    def load_dataset(df, pad_size=config.pad_size):
        contents = []
        summarizer = SummaryTxt()
        tokenizer=BertTokenizer.from_pretrained(config.bert_path)
        tokenizer.add_special_tokens({"additional_special_tokens": ['[PAD]','[CLS]']})
        if mode =='branch':
          text_col='branch_text'
        else:
          text_col='text'
        #build dataset
        print('mode: ', mode)
        for i in range(len(df)):
            content=df[text_col].iloc[i]
            label=df['label'].iloc[i]
            token = tokenizer.tokenize(content)
            seq_len = len(token)
            if mode =='branch':#branch mode
              if seq_len > pad_size: #if
                #if text is to long, generate the post's abstract to replace the post
                first_index=content.find(Sep)
                post=content[:first_index]
                post=summarizer.summaryScoredtxt(post)
                post = '。'.join(post)
                content=post + content[first_index:]
                #tokenizer the abstract
                token = tokenizer.tokenize(content)
                seq_len = len(token)
                if seq_len >pad_size:
                  #if the text is still to long, simply cutting
                  token = token[-pad_size:]
                  seq_len = len(token)
              pos = [i for i, x in enumerate(token) if x ==Sep]    
              if len(pos)>0:#if there is a [SEP], it is a subbranch for comment
                last_sep=pos[-1]#record the position of the last [SEP] for futuring vector cutting
              else:
                #if it is a subbranch for post
                last_sep=0
              mask = []
              token_ids = tokenizer.convert_tokens_to_ids(token)
              if pad_size:
                  if len(token) < pad_size:
                      mask = [1] * len(token_ids) + [0] * (pad_size - len(token))
                      token_ids += ([0] * (pad_size - len(token)))
                  else:
                      mask = [1] * pad_size
                      token_ids = token_ids[:pad_size]
                      seq_len = pad_size
              contents.append((token_ids, int(label), seq_len, mask ,last_sep))
            else:#non-branch mode
              mask = []
              token = [CLS] + token
              token_ids = tokenizer.convert_tokens_to_ids(token)
              pad_size = config.cmt_max_len
              if pad_size:
                  if len(token) < pad_size:
                      mask = [1] * len(token_ids) + [0] * (pad_size - len(token))
                      token_ids += ([0] * (pad_size - len(token)))
                  else:
                      mask = [1] * pad_size
                      token_ids = token_ids[:pad_size]
                      seq_len = pad_size
              contents.append((token_ids, int(label), seq_len, mask))
        return contents
    train = load_dataset(config.train_df, config.pad_size)
    test = load_dataset(config.test_df, config.pad_size)
    return train, test

class DatasetIterater(object):
    def __init__(self, batches, batch_size, device, mode):
        self.batch_size = batch_size
        self.batches = batches
        self.n_batches = len(batches) // batch_size
        self.residue = False  # if n_batch is an interger
        if len(batches) % self.n_batches != 0:
            self.residue = True
        self.index = 0
        self.device = device   
        self.mode = mode   
          
    def _to_tensor(self, datas):
        x = torch.LongTensor([_[0] for _ in datas]).to(self.device)
        y = torch.LongTensor([_[1] for _ in datas]).to(self.device)
        seq_len = torch.LongTensor([_[2] for _ in datas]).to(self.device)
        mask = torch.LongTensor([_[3] for _ in datas]).to(self.device)
        if self.mode=="branch":
          last_sep = torch.LongTensor([_[4] for _ in datas]).to(self.device)
          return (x, seq_len, mask, last_sep), y
        else:
          return (x, seq_len, mask), y

    def __next__(self):
        if self.residue and self.index == self.n_batches:
            batches = self.batches[self.index * self.batch_size: len(self.batches)]
            self.index += 1
            batches = self._to_tensor(batches)
            return batches
        elif self.index >= self.n_batches:
            self.index = 0
            raise StopIteration
        else:
            batches = self.batches[self.index * self.batch_size: (self.index + 1) * self.batch_size]
            self.index += 1
            batches = self._to_tensor(batches)
            return batches

    def __iter__(self):
        return self

    def __len__(self):
        if self.residue:
            return self.n_batches + 1
        else:
            return self.n_batches

def build_iterator(dataset, config, mode='branch'):
    iter = DatasetIterater(dataset, config.batch_size, config.device, mode)
    return iter


def get_time_dif(start_time):
    """get used time"""
    end_time = time.time()
    time_dif = end_time - start_time
    return timedelta(seconds=int(round(time_dif)))

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn import metrics
import time

def train(config, model, train_iter, dev_iter, test_iter):
    start_time = time.time()
    model.train()
    bert_params = list(map(id, model.bert.parameters()))
    other_params = filter(lambda p: id(p) not in bert_params, model.parameters())
    optimizer = torch.optim.AdamW([
             {'params': other_params, "lr": 1e-4},
             {'params': model.bert.parameters(), 'lr':config.learning_rate}])
    total_batch = 0  # record batch number
    dev_best_loss = float('inf')
    last_improve = 0  # record the batch number of last improvement
    flag = False  # if improved
    model.train()
    train_score=[]
    test_score=[]
    for epoch in range(config.num_epochs):
        print('Epoch [{}/{}]'.format(epoch + 1, config.num_epochs))
        for i, (trains, labels) in enumerate(train_iter):
            outputs = model(trains)
            model.zero_grad()
            loss = F.cross_entropy(outputs,labels)
            loss.backward()
            optimizer.step()
            if total_batch % 50 == 0:
                # print performance
                true = labels.data.cpu()
                predic = torch.max(outputs.data, 1)[1].cpu()
                train_acc = metrics.accuracy_score(true, predic)
                dev_acc, dev_loss = evaluate(config, model, dev_iter)
                if dev_loss < dev_best_loss:
                    dev_best_loss = dev_loss
                    torch.save(model.state_dict(), config.save_path)
                    improve = '*'
                    last_improve = total_batch
                else:
                    improve = ''
                time_dif = get_time_dif(start_time)
                msg = 'Iter: {0:>6},  Train Loss: {1:>5.2},  Train Acc: {2:>6.2%},  Val Loss: {3:>5.2},  Val Acc: {4:>6.2%},  Time: {5} {6}'
                print(msg.format(total_batch, loss.item(), train_acc, dev_loss, dev_acc, time_dif, improve))
                train_score.append(loss.item())
                test_score.append(dev_loss)
                model.train()
            total_batch += 1
            if total_batch - last_improve > config.require_improvement:
              # early stopping
              print("No optimization for a long time, auto-stopping...")
              flag = True
              break
        if flag:
          break
    return None

def evaluate(config, model, data_iter, test=False):
    model.eval()
    loss_total = 0
    predict_all = np.array([], dtype=int)
    labels_all = np.array([], dtype=int)
    with torch.no_grad():
        for texts, labels in data_iter:
            outputs = model(texts)
            loss = F.cross_entropy(outputs, labels)
            loss_total += loss
            labels = labels.data.cpu().numpy()
            predic = torch.max(outputs.data, 1)[1].cpu().numpy()
            labels_all = np.append(labels_all, labels)
            predict_all = np.append(predict_all, predic)
    acc = metrics.accuracy_score(labels_all, predict_all)
    if test:
        f1 = metrics.f1_score(labels_all, predict_all,average='macro')
        return predict_all, f1
    return acc, loss_total / len(data_iter)

def test(config, model, test_iter, df_test):
# test
  model.load_state_dict(torch.load(config.save_path))
  start_time = time.time()
  predict_all,  f1 = evaluate(config, model, test_iter, test=True)
  df_test['predicted'] = list(predict_all)
  return df_test, f1

def get_depth_f1(df,depth):
  if depth < 5:
    df_depth=df.loc[df['depth']==depth]
  else:
    df_depth=df.loc[df['depth']>=depth]
  f1 = metrics.f1_score(df_depth['label'].to_list(),df_depth['predicted'].to_list(),average='macro')
  return f1

In [None]:
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer

class BERT_CNN_Config(object):
    def __init__(self, dataset, df_train, df_test):
        self.model_name = 'Branch_BERT'
        self.train_df = df_train  # train set
        self.dev_df = df_test  # vad set
        self.test_df = df_test  # test set
        self.class_list = [0, 1, 2]  #  class list
        self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt'    
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')   # device
        self.require_improvement = 500      # require improvement batches for early stopping
        self.num_classes = len(self.class_list)                   
        self.num_epochs = 100                            
        self.batch_size = 16      
        self.pad_size = 500 #padding size for subbranches
        self.cmt_max_len = 200   # the max size of vecter for each instance after SR
        self.learning_rate = 1e-5  
        self.bert_path = '/content/drive/MyDrive/Prediction/pretrained_BERT'
        self.tokenizer = BertTokenizer.from_pretrained(self.bert_path)
        self.hidden_size = 768 
        self.filter_sizes = (2, 3, 4) # filter size
        self.num_filters = 32# filter number
        self.dropout = 0.5# droptout rate

class Global_Pooling_Config(object):
    def __init__(self, dataset, df_train, df_test):
        self.model_name = 'Branch_BERT w/o CFE'
        self.train_df = df_train  
        self.dev_df = df_test  
        self.test_df = df_test  
        self.class_list = [0, 1, 2]  
        self.num_classes = len(self.class_list)       
        self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt'        
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  
        self.require_improvement = 500                              
        self.num_epochs = 100                                       
        self.batch_size = 16                              
        self.pad_size = 500
        self.cmt_max_len = 200                                            
        self.learning_rate = 1e-5                                       
        self.bert_path = '/content/drive/MyDrive/Prediction/pretrained_BERT'
        self.tokenizer = BertTokenizer.from_pretrained(self.bert_path)
        self.hidden_size = 768
        self.dropout = 0.5

In [None]:
class BranchBert_CNN(nn.Module):
    '''Branch-BERT'''
    def __init__(self, config, mode='branch'):
        super(BranchBert_CNN, self).__init__()
        self.bert = BertModel.from_pretrained(config.bert_path)
        self.mode = mode
        for param in self.bert.parameters():
            param.requires_grad = True
        self.convs = nn.ModuleList(
            [nn.Conv2d(in_channels=1, out_channels=config.num_filters, kernel_size=(k, config.hidden_size)) for k in config.filter_sizes]
        )
        self.droptout = nn.Dropout(config.dropout)
        self.fc = nn.Linear(config.num_filters * len(config.filter_sizes), config.num_classes)
    def conv_and_pool(self, x, conv):#convulutional feature extraction layer
        x = conv(x)
        x = F.relu(x)
        x = x.squeeze(3)
        size = x.size(2)
        x = F.max_pool1d(x, size)
        x = x.squeeze(2)
        return x
    def forward(self, x):
        context = x[0]  # input sentences
        seq_len = x[1]
        mask = x[2]  
        cut_idx = x[3] # the position of the last [SEP]
        output = self.bert(context, attention_mask=mask)#SR mudule
        last_hidden_state = output.last_hidden_state #(batch_size,max_lenth, hidden)
        ts_list=[]
        for i in range(cut_idx.size()[0]):  
          temp_tensor=last_hidden_state[i,cut_idx[i]:seq_len[i],:].unsqueeze(0)#(batch_size, *real_lenth, hidden_size)   
          diff= int(config.cmt_max_len - (seq_len[i] - (cut_idx[i])))
          if diff>0:
            zero_pad = torch.zeros(1, diff , config.hidden_size).to(config.device)
            a = torch.cat([temp_tensor,zero_pad],dim=1) #zero-padding
          else:
            a=last_hidden_state[i,cut_idx[i]:cut_idx[i]+config.cmt_max_len,:].unsqueeze(0) #cutting 
          ts_list.append(a)
        last_hidden_state= torch.cat(ts_list,dim=0)
        out = last_hidden_state.unsqueeze(1) # ([batch_size, 1, padding_size, hidden_size])
        out = torch.cat([self.conv_and_pool(out, conv)for conv in self.convs], 1) #CFE module
        out = self.droptout(out)
        out = self.fc(out)#classification
        return out

class Branch_Averaging_Model(nn.Module):
    '''Branch-BERT w/o CFE'''  
    def __init__(self, config, mode='branch'):
        super(Branch_Averaging_Model, self).__init__()
        self.config = config
        self.mode = mode
        self.bert = BertModel.from_pretrained(config.bert_path)
        for param in self.bert.parameters():
            param.requires_grad = True
        self.droptout = nn.Dropout(config.dropout)
        self.fc = nn.Linear(config.hidden_size, config.num_classes)
        self.global_pooling=nn.AdaptiveAvgPool1d(1)

    def forward(self, x):
        context = x[0]
        seq_len = x[1]
        mask = x[2]  
        cut_idx = x[3]
        output = self.bert(context, attention_mask=mask)
        last_hidden_state = output.last_hidden_state #(batch_size,max_lenth, hidden)
        ts_list=[]
        for i in range(cut_idx.size()[0]):
          temp_tensor=last_hidden_state[i,cut_idx[i]:seq_len[i],:].unsqueeze(0)  #(batch_size, *real_lenth, hidden_size)
          diff= int(self.config.cmt_max_len - (seq_len[i] - (cut_idx[i])))
          if diff>0: #need zero padding
            zero_pad = torch.zeros(1, diff , self.config.hidden_size).to(self.config.device)
            a = torch.cat([temp_tensor,zero_pad],dim=1)
          else:# cut
            a=last_hidden_state[i,cut_idx[i]:cut_idx[i]+self.config.cmt_max_len,:].unsqueeze(0) 
          ts_list.append(a)
        last_hidden_state= torch.cat(ts_list,dim=0)# ([batch_size, seq_len, hidden_size])
        input = last_hidden_state.permute(0, 2, 1)
        output = self.global_pooling(input)
        output=output.permute(0, 2, 1)
        output=output.squeeze(1)# (batch_size,hidden_size)
        out = self.droptout(output)
        out = self.fc(out)
        return out

In [None]:
'''Branch-BERT'''
f=list()#overall F1
f1=list()#F1 depth=1
f2=list()#F1 depth=2
f3=list()#F1 depth=3
f4=list()#F1 depth=4
f5=list()#F1 depth>=5

for i in range(10):
    dataset='/content/drive/MyDrive/Prediction'
    df = pd.concat([df2,df_info])
    df = df.sample(frac=1, random_state=i)   #random shuffle
    cut_idx1 = int(round(0.8 * df.shape[0]))
    df_train,df_test = df[:cut_idx1], df[cut_idx1:]
    config = BERT_CNN_Config(dataset,df_train,df_test)
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)
    torch.backends.cudnn.deterministic = True  # ensure the results can be re-implemented
    start_time = time.time()
    print("Loading data...")
    train_data, test_data = build_dataset(config)
    train_iter = build_iterator(train_data, config)
    dev_iter = build_iterator(test_data, config)
    test_iter = build_iterator(test_data, config)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)
    model = BranchBert_CNN(config).to(config.device)  #Branch-BERT
    train(config, model, train_iter, dev_iter, test_iter)
    df_test,f_score = test(config, model, test_iter, df_test)
    filename="/content/drive/MyDrive/DATA/test_"+config.model_name+str(i)+'.csv'
    df_test.to_csv(filename)
    f.append(f_score)
    f1.append(get_depth_f1(df_test,1))
    f2.append(get_depth_f1(df_test,2))
    f3.append(get_depth_f1(df_test,3))
    f4.append(get_depth_f1(df_test,4))
    f5.append(get_depth_f1(df_test,5))

print("f: "+str(np.mean(f))+"+"+str(np.std(f, ddof = 1)))
print("f1: "+str(np.mean(f1))+"+"+str(np.std(f1, ddof = 1)))
print("f2: "+str(np.mean(f2))+"+"+str(np.std(f2, ddof = 1)))
print("f3: "+str(np.mean(f3))+"+"+str(np.std(f3, ddof = 1)))
print("f4: "+str(np.mean(f4))+"+"+str(np.std(f4, ddof = 1)))
print("f5: "+str(np.mean(f5))+"+"+str(np.std(f5, ddof = 1)))

In [None]:
'''Branch-BERT w/o CFE'''
f=list()
f1=list()
f2=list()
f3=list()
f4=list()
f5=list()

for i in range(10):
    dataset='/content/drive/MyDrive/Prediction'
    df = pd.concat([df2,df_info])
    df = df.sample(frac=1, random_state=i)   
    cut_idx1 = int(round(0.8 * df.shape[0]))
    df_train,df_test = df[:cut_idx1], df[cut_idx1:]
    config = Global_Pooling_Config(dataset,df_train,df_test)
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)
    torch.backends.cudnn.deterministic = True 
    start_time = time.time()
    print("Loading data...")
    train_data, test_data = build_dataset(config)
    train_iter = build_iterator(train_data, config)
    dev_iter = build_iterator(test_data, config)
    test_iter = build_iterator(test_data, config)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)
    model = Branch_Averaging_Model(config).to(config.device) #Branch-BERT w/o CFE
    train(config, model, train_iter, dev_iter, test_iter)
    df_test,f_score = test(config, model, test_iter, df_test)
    filename="/content/drive/MyDrive/DATA/test_"+config.model_name+str(i)+'.csv'
    df_test.to_csv(filename)
    f.append(f_score)
    f1.append(get_depth_f1(df_test,1))
    f2.append(get_depth_f1(df_test,2))
    f3.append(get_depth_f1(df_test,3))
    f4.append(get_depth_f1(df_test,4))
    f5.append(get_depth_f1(df_test,5))

print("f: "+str(np.mean(f))+"+"+str(np.std(f, ddof = 1)))
print("f1: "+str(np.mean(f1))+"+"+str(np.std(f1, ddof = 1)))
print("f2: "+str(np.mean(f2))+"+"+str(np.std(f2, ddof = 1)))
print("f3: "+str(np.mean(f3))+"+"+str(np.std(f3, ddof = 1)))
print("f4: "+str(np.mean(f4))+"+"+str(np.std(f4, ddof = 1)))
print("f5: "+str(np.mean(f5))+"+"+str(np.std(f5, ddof = 1)))

# Non-Branch Model
(BERT & Branch-BERT w/o SR)

In [None]:
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer
from transformers import ElectraModel, ElectraTokenizer

class BERT_CNN_Config(object):
    def __init__(self, dataset, df_train, df_test):
        self.model_name = 'BERT_CNN'
        self.train_df = df_train  
        self.dev_df = df_test  
        self.test_df = df_test  
        self.class_list = [0, 1, 2]  
        self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt'      
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')   
        self.require_improvement = 500                               
        self.num_classes = len(self.class_list)                         
        self.num_epochs = 100                                            
        self.batch_size = 16                                   
        self.pad_size = 200
        self.learning_rate = 1e-5                                      
        self.bert_path = '/content/drive/MyDrive/Prediction/pretrained_BERT/'
        self.tokenizer = BertTokenizer.from_pretrained(self.bert_path)
        self.hidden_size = 768
        self.filter_sizes = (2, 3, 4) 
        self.num_filters = 32
        self.dropout = 0.5

class BERT_Config(object):
    def __init__(self, dataset, df_train, df_test):
        self.model_name = 'BERT'
        self.train_df = df_train  
        self.dev_df = df_test  
        self.test_df = df_test  
        self.class_list = [0, 1, 2]  
        self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt'        
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  
        self.require_improvement = 300                              
        self.num_classes = len(self.class_list)                        
        self.num_epochs = 100                                      
        self.batch_size = 16                                  
        self.pad_size = 200
        self.learning_rate = 1e-5                                      
        self.bert_path = '/content/drive/MyDrive/Prediction/pretrained_BERT/'
        self.tokenizer = BertTokenizer.from_pretrained(self.bert_path)
        self.hidden_size = 768
        self.dropout = 0.5

In [None]:
class BERT_Model(nn.Module):
    '''BERT'''
    def __init__(self, config):
        super(BERT_Model, self).__init__()
        self.config = config
        self.bert = BertModel.from_pretrained(config.bert_path)
        for param in self.bert.parameters():
            param.requires_grad = True
        self.dropout = nn.Dropout(config.dropout)
        self.fc = nn.Linear(config.hidden_size, config.num_classes)

    def forward(self, x):
        context = x[0]  # 输入的句子
        seq_len = x[1]
        mask = x[2]  # 对padding部分进行mask，和句子一个size，padding部分用0表示，如：[1, 1, 1, 1, 0, 0]
        output = self.bert(context, attention_mask=mask)
        out = self.dropout(output.pooler_output)
        out = self.fc(out)
        return out

class BERT_CNN(nn.Module):
    '''Branch-BERT w/o SR'''
    def __init__(self, config):
        super(BERT_CNN, self).__init__()
        self.bert = BertModel.from_pretrained(config.bert_path)
        for param in self.bert.parameters():
            param.requires_grad = True
        self.convs = nn.ModuleList(
            [nn.Conv2d(1, config.num_filters, (k, config.hidden_size)) for k in config.filter_sizes])
        self.dropout = nn.Dropout(config.dropout)
        self.fc_cnn = nn.Linear(config.num_filters * len(config.filter_sizes), config.num_classes)

    def conv_and_pool(self, x, conv):
        x = F.relu(conv(x)).squeeze(3)
        x = F.max_pool1d(x, x.size(2)).squeeze(2)
        return x

    def forward(self, x):
        context = x[0]  # 输入的句子
        mask = x[2]  # 对padding部分进行mask，和句子一个size，padding部分用0表示，如：[1, 1, 1, 1, 0, 0]
        output = self.bert(context, attention_mask=mask)
        last_hidden_state = output.last_hidden_state #(batch_size,max_lenth, hidden)
        out = last_hidden_state.unsqueeze(1)
        out = torch.cat([self.conv_and_pool(out, conv) for conv in self.convs], 1)
        out = self.dropout(out)
        out = self.fc_cnn(out)
        return out

In [None]:
'''Branch-BERT w/o SR'''
f=list()
f1=list()
f2=list()
f3=list()
f4=list()
f5=list()


for i in range(10):
    dataset='/content/drive/MyDrive/Prediction'
    df = pd.concat([df2,df_info])
    df = df.sample(frac=1, random_state=i)   
    cut_idx1 = int(round(0.8 * df.shape[0]))
    df_train,df_test = df[:cut_idx1], df[cut_idx1:]
    config = BERT_CNN_Config(dataset,df_train,df_test)
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)
    torch.backends.cudnn.deterministic = True  
    start_time = time.time()
    print("Loading data...")
    train_data, test_data = build_dataset(config,mode='non-branch'))
    train_iter = build_iterator(train_data, config,mode='non-branch'))
    dev_iter = build_iterator(test_data, config,mode='non-branch'))
    test_iter = build_iterator(test_data, config,mode='non-branch'))
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)
    model = BERT_CNN(config).to(config.device) '''Branch-BERT w/o SR'''
    train(config, model, train_iter, dev_iter, test_iter)
    df_test,f_score = test(config, model, test_iter, df_test)
    filename="/content/drive/MyDrive/DATA/test_"+config.model_name+str(i)+'.csv'
    df_test.to_csv(filename)
    f.append(f_score)
    f1.append(get_depth_f1(df_test,1))
    f2.append(get_depth_f1(df_test,2))
    f3.append(get_depth_f1(df_test,3))
    f4.append(get_depth_f1(df_test,4))
    f5.append(get_depth_f1(df_test,5))

print("f: "+str(np.mean(f))+"+"+str(np.std(f, ddof = 1)))
print("f1: "+str(np.mean(f1))+"+"+str(np.std(f1, ddof = 1)))
print("f2: "+str(np.mean(f2))+"+"+str(np.std(f2, ddof = 1)))
print("f3: "+str(np.mean(f3))+"+"+str(np.std(f3, ddof = 1)))
print("f4: "+str(np.mean(f4))+"+"+str(np.std(f4, ddof = 1)))
print("f5: "+str(np.mean(f5))+"+"+str(np.std(f5, ddof = 1)))

In [None]:
'''BERT'''
f=list()
f1=list()
f2=list()
f3=list()
f4=list()
f5=li
for i in range(10):
    dataset='/content/drive/MyDrive/Prediction'
    df = pd.concat([df2,df_info])
    df = df.sample(frac=1, random_state=i)   
    cut_idx1 = int(round(0.8 * df.shape[0]))
    df_train,df_test = df[:cut_idx1], df[cut_idx1:]
    config = BERT_Config(dataset,df_train,df_test)
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)
    torch.backends.cudnn.deterministic = True  
    start_time = time.time()
    print("Loading data...")
    train_data, test_data = build_dataset(config,mode='non-branch'))
    train_iter = build_iterator(train_data, config,mode='non-branch')
    dev_iter = build_iterator(test_data, config,mode='non-branch')
    test_iter = build_iterator(test_data, config,mode='non-branch')
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)
    model = BERT_Model(config).to(config.device)
    train(config, model, train_iter, dev_iter, test_iter)
    df_test,f_score = test(config, model, test_iter, df_test)
    filename="/content/drive/MyDrive/DATA/test_"+config.model_name+str(i)+'.csv'
    df_test.to_csv(filename)
    f.append(f_score)
    f1.append(get_depth_f1(df_test,1))
    f2.append(get_depth_f1(df_test,2))
    f3.append(get_depth_f1(df_test,3))
    f4.append(get_depth_f1(df_test,4))
    f5.append(get_depth_f1(df_test,5))

print("f: "+str(np.mean(f))+"+"+str(np.std(f, ddof = 1)))
print("f1: "+str(np.mean(f1))+"+"+str(np.std(f1, ddof = 1)))
print("f2: "+str(np.mean(f2))+"+"+str(np.std(f2, ddof = 1)))
print("f3: "+str(np.mean(f3))+"+"+str(np.std(f3, ddof = 1)))
print("f4: "+str(np.mean(f4))+"+"+str(np.std(f4, ddof = 1)))
print("f5: "+str(np.mean(f5))+"+"+str(np.std(f5, ddof = 1)))

Loading data...
Time usage: 0:00:02


Some weights of the model checkpoint at /content/drive/MyDrive/Prediction/pretrained_BERT/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch [1/100]
Iter:      0,  Train Loss:  0.96,  Train Acc: 50.00%,  Val Loss:   1.0,  Val Acc: 38.64%,  Time: 0:00:08 *
Iter:     50,  Train Loss:   1.0,  Train Acc: 50.00%,  Val Loss:  0.94,  Val Acc: 52.34%,  Time: 0:00:18 *
Iter:    100,  Train Loss:  0.88,  Train Acc: 62.50%,  Val Loss:  0.91,  Val Acc: 53.28%,  Time: 0:00:27 *
Iter:    150,  Train Loss:   1.0,  Train Acc: 18.75%,  Val Loss:  0.88,  Val Acc: 55.32%,  Time: 0:00:37 *
Iter:    200,  Train Loss:  0.82,  Train Acc: 43.75%,  Val Loss:  0.85,  Val Acc: 56.60%,  Time: 0:00:46 *
Iter:    250,  Train Loss:  0.98,  Train Acc: 50.00%,  Val Loss:  0.89,  Val Acc: 55.74%,  Time: 0:00:55 
Epoch [2/100]
Iter:    300,  Train Loss:  0.96,  Train Acc: 50.00%,  Val Loss:  0.83,  Val Acc: 59.83%,  Time: 0:01:04 *
Iter:    350,  Train Loss:  0.87,  Train Acc: 62.50%,  Val Loss:  0.86,  Val Acc: 57.28%,  Time: 0:01:13 
Iter:    400,  Train Loss:  0.76,  Train Acc: 56.25%,  Val Loss:  0.83,  Val Acc: 58.13%,  Time: 0:01:21 
Iter:    450

Some weights of the model checkpoint at /content/drive/MyDrive/Prediction/pretrained_BERT/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch [1/100]
Iter:      0,  Train Loss:   1.1,  Train Acc: 31.25%,  Val Loss:   1.0,  Val Acc: 37.70%,  Time: 0:00:04 *
Iter:     50,  Train Loss:  0.98,  Train Acc: 43.75%,  Val Loss:  0.94,  Val Acc: 50.98%,  Time: 0:00:13 *
Iter:    100,  Train Loss:  0.91,  Train Acc: 43.75%,  Val Loss:  0.92,  Val Acc: 51.66%,  Time: 0:00:23 *
Iter:    150,  Train Loss:   1.0,  Train Acc: 25.00%,  Val Loss:  0.89,  Val Acc: 55.74%,  Time: 0:00:32 *
Iter:    200,  Train Loss:   1.0,  Train Acc: 43.75%,  Val Loss:  0.89,  Val Acc: 54.21%,  Time: 0:00:42 *
Iter:    250,  Train Loss:  0.77,  Train Acc: 43.75%,  Val Loss:  0.86,  Val Acc: 60.00%,  Time: 0:00:51 *
Epoch [2/100]
Iter:    300,  Train Loss:  0.87,  Train Acc: 68.75%,  Val Loss:  0.84,  Val Acc: 58.98%,  Time: 0:01:01 *
Iter:    350,  Train Loss:  0.86,  Train Acc: 50.00%,  Val Loss:  0.83,  Val Acc: 60.77%,  Time: 0:01:10 *
Iter:    400,  Train Loss:   1.1,  Train Acc: 50.00%,  Val Loss:  0.81,  Val Acc: 60.51%,  Time: 0:01:20 *
Iter:    

Some weights of the model checkpoint at /content/drive/MyDrive/Prediction/pretrained_BERT/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch [1/100]
Iter:      0,  Train Loss:   1.1,  Train Acc: 37.50%,  Val Loss:   1.0,  Val Acc: 45.53%,  Time: 0:00:04 *
Iter:     50,  Train Loss:  0.96,  Train Acc: 50.00%,  Val Loss:  0.95,  Val Acc: 51.15%,  Time: 0:00:13 *
Iter:    100,  Train Loss:  0.92,  Train Acc: 43.75%,  Val Loss:  0.92,  Val Acc: 54.04%,  Time: 0:00:23 *
Iter:    150,  Train Loss:   1.1,  Train Acc: 43.75%,  Val Loss:  0.97,  Val Acc: 51.15%,  Time: 0:00:31 
Iter:    200,  Train Loss:  0.76,  Train Acc: 81.25%,  Val Loss:   0.9,  Val Acc: 55.06%,  Time: 0:00:41 *
Iter:    250,  Train Loss:   1.2,  Train Acc: 43.75%,  Val Loss:  0.87,  Val Acc: 58.47%,  Time: 0:00:50 *
Epoch [2/100]
Iter:    300,  Train Loss:  0.75,  Train Acc: 68.75%,  Val Loss:  0.84,  Val Acc: 59.66%,  Time: 0:01:00 *
Iter:    350,  Train Loss:  0.69,  Train Acc: 81.25%,  Val Loss:  0.85,  Val Acc: 60.51%,  Time: 0:01:08 
Iter:    400,  Train Loss:   0.7,  Train Acc: 68.75%,  Val Loss:  0.83,  Val Acc: 60.51%,  Time: 0:01:18 *
Iter:    45

Some weights of the model checkpoint at /content/drive/MyDrive/Prediction/pretrained_BERT/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch [1/100]
Iter:      0,  Train Loss:   1.1,  Train Acc: 31.25%,  Val Loss:  0.99,  Val Acc: 42.55%,  Time: 0:00:04 *
Iter:     50,  Train Loss:  0.82,  Train Acc: 62.50%,  Val Loss:  0.95,  Val Acc: 48.34%,  Time: 0:00:13 *
Iter:    100,  Train Loss:  0.92,  Train Acc: 56.25%,  Val Loss:  0.91,  Val Acc: 51.91%,  Time: 0:00:23 *
Iter:    150,  Train Loss:   1.0,  Train Acc: 50.00%,  Val Loss:  0.85,  Val Acc: 57.11%,  Time: 0:00:32 *
Iter:    200,  Train Loss:   1.0,  Train Acc: 56.25%,  Val Loss:  0.86,  Val Acc: 52.68%,  Time: 0:00:41 
Iter:    250,  Train Loss:  0.67,  Train Acc: 81.25%,  Val Loss:  0.81,  Val Acc: 61.11%,  Time: 0:00:50 *
Epoch [2/100]
Iter:    300,  Train Loss:  0.87,  Train Acc: 43.75%,  Val Loss:   0.8,  Val Acc: 60.85%,  Time: 0:01:00 *
Iter:    350,  Train Loss:  0.92,  Train Acc: 56.25%,  Val Loss:  0.81,  Val Acc: 59.57%,  Time: 0:01:08 
Iter:    400,  Train Loss:  0.76,  Train Acc: 56.25%,  Val Loss:  0.79,  Val Acc: 61.87%,  Time: 0:01:18 *
Iter:    45

Some weights of the model checkpoint at /content/drive/MyDrive/Prediction/pretrained_BERT/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch [1/100]
Iter:      0,  Train Loss:   1.3,  Train Acc: 25.00%,  Val Loss:   1.0,  Val Acc: 43.57%,  Time: 0:00:05 *
Iter:     50,  Train Loss:  0.69,  Train Acc: 56.25%,  Val Loss:   1.0,  Val Acc: 49.62%,  Time: 0:00:13 
Iter:    100,  Train Loss:  0.96,  Train Acc: 37.50%,  Val Loss:  0.95,  Val Acc: 53.28%,  Time: 0:00:23 *
Iter:    150,  Train Loss:  0.79,  Train Acc: 56.25%,  Val Loss:  0.91,  Val Acc: 53.28%,  Time: 0:00:33 *
Iter:    200,  Train Loss:  0.79,  Train Acc: 62.50%,  Val Loss:  0.89,  Val Acc: 55.40%,  Time: 0:00:43 *
Iter:    250,  Train Loss:  0.84,  Train Acc: 43.75%,  Val Loss:  0.86,  Val Acc: 60.00%,  Time: 0:00:53 *
Epoch [2/100]
Iter:    300,  Train Loss:   1.0,  Train Acc: 43.75%,  Val Loss:  0.89,  Val Acc: 56.60%,  Time: 0:01:01 
Iter:    350,  Train Loss:  0.78,  Train Acc: 75.00%,  Val Loss:  0.84,  Val Acc: 58.64%,  Time: 0:01:11 *
Iter:    400,  Train Loss:  0.72,  Train Acc: 56.25%,  Val Loss:  0.83,  Val Acc: 58.04%,  Time: 0:01:20 *
Iter:    45

# Deep Learning Based Model
(TextCNN & Tan)

In [None]:

'''Load pre-trained word embedding'''
emb_dim = 300
vocab_dir = "/content/drive/MyDrive/Prediction/data/vocab.pkl"
filename_trimmed_dir = "/content/drive/MyDrive/Prediction/data/embedding_CantoStance"

if os.path.exists(vocab_dir):
    word_to_id = pkl.load(open(vocab_dir, 'rb'))
else:
    tokenizer = fenci
    #tokenizer = lambda x: [y for y in x]  # 以字为单位构建词表
    word_to_id = build_vocab(tokenizer=tokenizer, max_size=MAX_VOCAB_SIZE, min_freq=1,df=df)
    pkl.dump(word_to_id, open(vocab_dir, 'wb'))

count=0
embeddings = np.random.rand(len(word_to_id), emb_dim)
print(len(word_to_id))
emb_model = gensim.models.KeyedVectors.load_word2vec_format(r'/content/drive/MyDrive/DATA/sgns.wiki.word.bz2')
for i in word_to_id.keys():
  idx = word_to_id[i]
  if i not in emb_model.vocab:
    count+=1
    emb = [0]* emb_dim
  else:
    emb = emb_model[i]
  embeddings[idx] = np.asarray(emb, dtype='float32')
print(count)
np.savez_compressed(filename_trimmed_dir, embeddings=embeddings)

In [None]:
# coding: UTF-8
import os
import torch
import numpy as np
import gensim
import numpy as np
import pickle as pkl
from tqdm import tqdm
import time
from datetime import timedelta
import pycantonese
import jieba
MAX_VOCAB_SIZE = 20000  # 词表长度限制
UNK, PAD = '<UNK>', '<PAD>'  # 未知字，padding符号

fenci=jieba.lcut# tonkenizer
def build_vocab(tokenizer, max_size, min_freq, df):
    vocab_dic = {}
    for i in range(len(df)):
      content = df['text'].iloc[i]
      for word in tokenizer(content):
        vocab_dic[word] = vocab_dic.get(word, 0) + 1
    vocab_list = sorted([_ for _ in vocab_dic.items() if _[1] >= min_freq], key=lambda x: x[1], reverse=True)[:max_size-2]
    vocab_dic = {word_count[0]: idx for idx, word_count in enumerate(vocab_list)}
    vocab_dic.update({UNK: len(vocab_dic), PAD: len(vocab_dic) + 1})
    return vocab_dic

def build_dataset(config):
    tokenizer = fenci
    if os.path.exists(config.vocab_path):
        vocab = pkl.load(open(config.vocab_path, 'rb'))
    else:
        vocab = build_vocab(config.train_path, tokenizer=tokenizer, max_size=MAX_VOCAB_SIZE, min_freq=1)
        pkl.dump(vocab, open(config.vocab_path, 'wb'))
    print(f"Vocab size: {len(vocab)}")
    def load_dataset(data, pad_size=200):
        contents = []
        for i in range(len(data)):
                content = data['text'].iloc[i]
                label = data['label'].iloc[i]
                words_line = []
                token = tokenizer(content)
                seq_len = len(token)
                if pad_size:
                    if len(token) < pad_size:
                        token.extend([PAD] * (pad_size - len(token)))
                    else:
                        token = token[:pad_size]
                        seq_len = pad_size
                # word to id
                for word in token:
                    words_line.append(vocab.get(word, vocab.get(UNK)))
                contents.append((words_line, int(label), seq_len))
        return contents 
    train = load_dataset(config.train_df, config.pad_size)
    test = load_dataset(config.test_df, config.pad_size)
    return vocab, train, test


class DatasetIterater(object):
    def __init__(self, batches, batch_size, device, pad_size, is_Tan):
        self.batch_size = batch_size
        self.batches = batches
        self.n_batches = len(batches) // batch_size
        self.residue = False  # 记录batch数量是否为整数
        if len(batches) % self.n_batches != 0:
            self.residue = True
        self.index = 0
        self.device = device
        self.is_Tan = is_Tan
        self.target = '接种新冠疫苗'
        tokenizer=fenci
        vocab = pkl.load(open(config.vocab_path, 'rb'))
        self.target_ids= [vocab.get(word, vocab.get(UNK)) for word in tokenizer(self.target)]#target embedding ids
        if len(self.target_ids) < pad_size:
            self.target_ids.extend([vocab.get(PAD)] * (pad_size - len(self.target_ids)))
        
    def _to_tensor(self, datas):
        x = torch.LongTensor([_[0] for _ in datas]).to(self.device)
        y = torch.LongTensor([_[1] for _ in datas]).to(self.device)
        # pad前的长度(超过pad_size的设为pad_size)
        seq_len = torch.LongTensor([_[2] for _ in datas]).to(self.device)
        if self.is_Tan:
          t = torch.LongTensor([self.target_ids for _ in range(len(datas))]).to(self.device)
          return (x, t, seq_len), y
        else:
          return (x, seq_len), y

    def __next__(self):
        if self.residue and self.index == self.n_batches:
            batches = self.batches[self.index * self.batch_size: len(self.batches)]
            self.index += 1
            batches = self._to_tensor(batches)
            return batches

        elif self.index >= self.n_batches:
            self.index = 0
            raise StopIteration
        else:
            batches = self.batches[self.index * self.batch_size: (self.index + 1) * self.batch_size]
            self.index += 1
            batches = self._to_tensor(batches)
            return batches

    def __iter__(self):
        return self

    def __len__(self):
        if self.residue:
            return self.n_batches + 1
        else:
            return self.n_batches


def build_iterator(dataset, config ,is_Tan=True):
    iter = DatasetIterater(dataset, config.batch_size, config.device, config.pad_size, is_Tan)
    return iter


def get_time_dif(start_time):
    """获取已使用时间"""
    end_time = time.time()
    time_dif = end_time - start_time
    return timedelta(seconds=int(round(time_dif)))

In [None]:
# coding: UTF-8
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn import metrics
import time

def init_network(model, method='xavier', exclude='embedding', seed=123):#initialize network
    for name, w in model.named_parameters():
        if exclude not in name:
            if 'weight' in name:
                if method == 'xavier':
                    nn.init.xavier_normal_(w)
                elif method == 'kaiming':
                    nn.init.kaiming_normal_(w)
                else:
                    nn.init.normal_(w)
            elif 'bias' in name:
                nn.init.constant_(w, 0)
            else:
                pass


def train(config, model, train_iter, dev_iter, test_iter):
    start_time = time.time()
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate,weight_decay=1e-6)
    total_batch = 0  
    dev_best_loss = float('inf')
    last_improve = 0  
    flag = False  
    for epoch in range(config.num_epochs):
        print('Epoch [{}/{}]'.format(epoch + 1, config.num_epochs))
        for i, (trains, labels) in enumerate(train_iter):
            outputs = model(trains)
            model.zero_grad()
            loss = F.cross_entropy(outputs, labels)
            loss.backward()
            optimizer.step()
            if total_batch % 50 == 0:
                true = labels.data.cpu()
                predic = torch.max(outputs.data, 1)[1].cpu()
                train_acc = metrics.accuracy_score(true, predic)
                dev_acc, dev_loss = evaluate(config, model, dev_iter)
                if dev_loss < dev_best_loss:
                    dev_best_loss = dev_loss
                    torch.save(model.state_dict(), config.save_path)
                    improve = '*'
                    last_improve = total_batch
                else:
                    improve = ''
                time_dif = get_time_dif(start_time)
                msg = 'Iter: {0:>6},  Train Loss: {1:>5.2},  Train Acc: {2:>6.2%},  Val Loss: {3:>5.2},  Val Acc: {4:>6.2%},  Time: {5} {6}'
                print(msg.format(total_batch, loss.item(), train_acc, dev_loss, dev_acc, time_dif, improve))
                model.train()
            total_batch += 1
            if total_batch - last_improve > config.require_improvement:
                print("No optimization for a long time, auto-stopping...")
                flag = True
                break
        if flag:
            break
    return None
    
def evaluate(config, model, data_iter, test=False):
    model.eval()
    loss_total = 0
    predict_all = np.array([], dtype=int)
    labels_all = np.array([], dtype=int)
    with torch.no_grad():
        for texts, labels in data_iter:
            outputs = model(texts)
            loss = F.cross_entropy(outputs, labels)
            loss_total += loss
            labels = labels.data.cpu().numpy()
            predic = torch.max(outputs.data, 1)[1].cpu().numpy()
            labels_all = np.append(labels_all, labels)
            predict_all = np.append(predict_all, predic)
    acc = metrics.accuracy_score(labels_all, predict_all)
    if test:
        f1 = metrics.f1_score(labels_all, predict_all,average='macro')
        return predict_all, f1
    return acc, loss_total / len(data_iter)

def get_depth_f1(df,depth):
  if depth < 5:
    df_depth=df.loc[df['depth']==depth]
  else:
    df_depth=df.loc[df['depth']>=depth]
  f1 = metrics.f1_score(df_depth['label'].to_list(),df_depth['predicted'].to_list(),average='macro')
  return f1

def test(config, model, test_iter, df):
# test
  model.load_state_dict(torch.load(config.save_path))
  model.eval()
  start_time = time.time()
  predict_all,  f1 = evaluate(config, model, test_iter, test=True)
  df['predicted'] = list(predict_all)
  return df, f1

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
class CNN_Config(object):
    def __init__(self, dataset, embedding, df_train, df_test):
        self.model_name = 'Text_CNN'
        self.train_df = df_train  
        self.dev_df = df_test  
        self.test_df = df_test  
        self.class_list = [0, 1, 2]  
        self.num_classes = len(self.class_list)
        self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt'        
        self.vocab_path = dataset + '/data/vocab.pkl' 
        self.embedding_pretrained = torch.tensor(np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32')) # pre-trained word-embeddings
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.require_improvement = 500 
        self.n_vocab = 0    #number of vocabulary, will be initiated when running
        self.num_epochs = 100   
        self.batch_size = 16   
        self.pad_size = 200    
        self.learning_rate = 2e-4
        self.embed = self.embedding_pretrained.size(1)if self.embedding_pretrained is not None else 300  # word-embedding dimention
        self.filter_sizes = (2, 3, 4) 
        self.num_filters = 32 
        self.dropout = 0.5  

class Bi_LSTM_Att_Config(object):
    def __init__(self, dataset, embedding, df_train, df_test):
      self.model_name = 'TAN'
      self.train_df = df_train  
      self.dev_df = df_test  
      self.test_df = df_test  
      self.class_list = [0, 1, 2]  
      self.save_path = dataset + '/saved_dict/' + self.model_name + '.ckpt'   
      self.vocab_path = dataset + '/data/vocab.pkl' 
      self.embedding_pretrained = torch.tensor(np.load(dataset + '/data/' + embedding)["embeddings"].astype('float32')) 
      self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
      self.require_improvement = 500 
      self.num_classes = len(self.class_list) 
      self.n_vocab = 0  
      self.num_epochs = 20     
      self.batch_size = 16     
      self.pad_size = 64      
      self.learning_rate = 2e-4 
      self.embed = self.embedding_pretrained.size(1)if self.embedding_pretrained is not None else 300  
      self.hidden_size = 256 # lstm hidden size
      self.num_layers = 2  # lstm layer number
      self.hidden_size2 = 64
      self.dropout = 0.5  

In [None]:
class TextCNN(nn.Module):
    def __init__(self, config):
        super(TextCNN, self).__init__()
        if config.embedding_pretrained is not None:
            self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False)
        else:
            self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1)
        self.convs = nn.ModuleList(
            [nn.Conv2d(1, config.num_filters, (k, config.embed)) for k in config.filter_sizes])
        self.dropout = nn.Dropout(config.dropout)
        self.fc = nn.Linear(config.num_filters * len(config.filter_sizes), config.num_classes)

    def conv_and_pool(self, x, conv):
        x = F.relu(conv(x)).squeeze(3)
        x = F.max_pool1d(x, x.size(2)).squeeze(2)
        return x

    def forward(self, x):
        out = self.embedding(x[0])
        out = out.unsqueeze(1)
        out = torch.cat([self.conv_and_pool(out, conv) for conv in self.convs], 1)
        out = self.dropout(out)
        out = self.fc(out)
        return out

class TAN(nn.Module):
    def __init__(self,config):
        super(TAN, self).__init__()
        self.embedding_dim = config.embed
        if config.embedding_pretrained is not None:
            self.embedding = nn.Embedding.from_pretrained(config.embedding_pretrained, freeze=False)
        else:
            self.embedding = nn.Embedding(config.n_vocab, config.embed, padding_idx=config.n_vocab - 1)
        self.attention = nn.Linear(2*config.embed,1)
        self.lstm = nn.LSTM(config.embed, config.hidden_size, bidirectional=True)
        self.dropout = nn.Dropout(config.dropout)
        # self.fc = nn.Linear(2*config.hidden_size, config.num_classes)
        self.fc1 = nn.Linear(config.hidden_size * 2, config.hidden_size2)
        self.fc = nn.Linear(config.hidden_size2, config.num_classes)
        self.w = nn.Parameter(torch.zeros(config.embed * 2))

    def forward(self,x):
        sentence,target, _ = x
        x_emb = self.embedding(sentence)# [batch_size, seq_len, embeding]
        t_emb = self.embedding(target)# [batch_size, seq_len, embeding]
        z = torch.sum(t_emb, dim=1) 
        z = torch.div(z, x_emb.size()[1]) #[batch_size,  embeding]
        z = z.unsqueeze(1)    # (batch_size, 1, emb_dim)
        z = torch.tile(z, [1, x_emb.size()[1], 1]) # (batch_size, seq_len, embeding)      
        xt_emb = torch.cat((x_emb,z),dim=2)# (batch_size, seq_len, 2*embeding) 
        # attention_layer
        a = F.softmax(torch.matmul(xt_emb, self.w), dim=1)
        a = a.unsqueeze(-1)  
        h, _ = self.lstm(x_emb)# (batch_size, seq_len, 2*hidden) 
        out = h * a # (batch_size, seq_len, 2*hidden) 
        # = torch.mean(out,dim=1)# (batch_size, 2*hidden)
        # out = self.fc(final_hidden_state)
        out = torch.sum(out, 1)  # [128, 256]
        out = F.relu(out)
        out = self.fc1(out)
        out = self.fc(out)  # [128, 64]
        return out

In [None]:
# coding: UTF-8
import time
import torch
import numpy as np

embedding = 'embedding_CantoStance.npz'

f=list()
f1=list()
f2=list()
f3=list()
f4=list()
f5=list()

for i in range(10):
    dataset='/content/drive/MyDrive/Prediction'
    df = pd.concat([df2,df_info])
    df = df.sample(frac=1, random_state=i)   
    cut_idx1 = int(round(0.8 * df.shape[0]))
    df_train,df_test = df[:cut_idx1], df[cut_idx1:]
    config = Bi_LSTM_Att_Config(dataset, embedding,df_train,df_test)
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)
    torch.backends.cudnn.deterministic = True  
    start_time = time.time()
    print("Loading data...")
    vocab, train_data, test_data = build_dataset(config)
    train_iter = build_iterator(train_data, config,)
    dev_iter = build_iterator(test_data, config,)
    test_iter = build_iterator(test_data, config)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)
    config.n_vocab = len(vocab)
    model = TAN(config).to(config.device)
    init_network(model)
    train(config, model, train_iter, dev_iter, test_iter)
    df_test,f_score = test(config, model, test_iter, df_test)
    filename="/content/drive/MyDrive/DATA/test_"+config.model_name+str(i)+'.csv'
    df_test.to_csv(filename)
    f.append(f_score)
    f1.append(get_depth_f1(df_test,1))
    f2.append(get_depth_f1(df_test,2))
    f3.append(get_depth_f1(df_test,3))
    f4.append(get_depth_f1(df_test,4))
    f5.append(get_depth_f1(df_test,5))

print("f: "+str(np.mean(f))+"+"+str(np.std(f, ddof = 1)))
print("f1: "+str(np.mean(f1))+"+"+str(np.std(f1, ddof = 1)))
print("f2: "+str(np.mean(f2))+"+"+str(np.std(f2, ddof = 1)))
print("f3: "+str(np.mean(f3))+"+"+str(np.std(f3, ddof = 1)))
print("f4: "+str(np.mean(f4))+"+"+str(np.std(f4, ddof = 1)))
print("f5: "+str(np.mean(f5))+"+"+str(np.std(f5, ddof = 1)))

In [None]:
import time
import torch
import numpy as np

dataset='/content/drive/MyDrive/Prediction' # 数据集
embedding = 'embedding_CantoStance.npz'


f=list()
f1=list()
f2=list()
f3=list()
f4=list()
f5=list()

for i in range(10):
    dataset='/content/drive/MyDrive/Prediction'
    df = pd.concat([df2,df_info])
    df = df.sample(frac=1, random_state=i)   
    cut_idx1 = int(round(0.8 * df.shape[0]))
    df_train,df_test = df[:cut_idx1], df[cut_idx1:]
    config = CNN_Config(dataset, embedding,df_train,df_test)
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)
    torch.backends.cudnn.deterministic = True  # 保证每次结果一样
    start_time = time.time()
    print("Loading data...")
    vocab, train_data, test_data = build_dataset(config)
    train_iter = build_iterator(train_data, config,)
    dev_iter = build_iterator(test_data, config,)
    test_iter = build_iterator(test_data, config)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)
    config.n_vocab = len(vocab)
    model = TextCNN(config).to(config.device)
    init_network(model)
    train(config, model, train_iter, dev_iter, test_iter)
    df_test,f_score = test(config, model, test_iter, df_test)
    filename="/content/drive/MyDrive/DATA/test_"+config.model_name+str(i)+'.csv'
    df_test.to_csv(filename)
    f.append(f_score)
    f1.append(get_depth_f1(df_test,1))
    f2.append(get_depth_f1(df_test,2))
    f3.append(get_depth_f1(df_test,3))
    f4.append(get_depth_f1(df_test,4))
    f5.append(get_depth_f1(df_test,5))

print("f: "+str(np.mean(f))+"+"+str(np.std(f, ddof = 1)))
print("f1: "+str(np.mean(f1))+"+"+str(np.std(f1, ddof = 1)))
print("f2: "+str(np.mean(f2))+"+"+str(np.std(f2, ddof = 1)))
print("f3: "+str(np.mean(f3))+"+"+str(np.std(f3, ddof = 1)))
print("f4: "+str(np.mean(f4))+"+"+str(np.std(f4, ddof = 1)))
print("f5: "+str(np.mean(f5))+"+"+str(np.std(f5, ddof = 1)))

# SVM-ngram Model

In [None]:
import pycantonese

stop_words = pycantonese.stop_words()
def create_sentences(df):#create input of SVM
  sentences=[]
  for i in range(len(df)):
      text_raw=df['text'].iloc[i]
      segs = pycantonese.segment(text_raw)
      segs = filter(lambda x:x not in stop_words, segs)
      sentences.append((" ".join(segs),df['label'].iloc[i]))
  return sentences


In [None]:
import random
from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC,LinearSVC
from sklearn import svm
from sklearn.metrics import *
import tensorflow as tf
import scipy.sparse as sp
from scipy.sparse import hstack

class TextClassifier():
    def __init__(self, classifier=SVC(kernel='linear')):
        self.classifier = LinearSVC(C=0.2, class_weight=None, dual=True, fit_intercept=True,
                       intercept_scaling=1, loss='squared_hinge', max_iter=300,
                       multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
                       verbose=0)
        self.vectorizer1 = CountVectorizer(analyzer='word', ngram_range=(1,3), stop_words=stop_words)
        self.vectorizer2 = CountVectorizer(analyzer='char', ngram_range=(2,5), stop_words=stop_words)
    def features(self, X):
        word=self.vectorizer1.transform(X)
        character=self.vectorizer2.transform(X)
        feature = hstack([word,character])
        return feature
    def fit(self, X, y):
        self.vectorizer1.fit(X) #word-level ngram
        self.vectorizer2.fit(X) #charactor ngram
        self.classifier.fit(self.features(X), y) 
    def predict(self, x):
        return self.classifier.predict(self.features(x))


f=list()
f1=list()
f2=list()
f3=list()
f4=list()
f5=list()


def get_depth_f1(df,depth):
    if depth < 5:
      df_depth=df.loc[df['depth']==depth]
    else:
      df_depth=df.loc[df['depth']>=depth]
    f1 = f1_score(df_depth['label'].to_list(),df_depth['predicted'].to_list(),average='macro')
    return f1
for i in range(10):
    dataset='/content/drive/MyDrive/Prediction'
    df = pd.concat([df2,df_info])
    df = df.sample(frac=1, random_state=i)   
    cut_idx1 = int(round(0.8 * df.shape[0]))
    df_train,df_test = df[:cut_idx1], df[cut_idx1:]
    sentences_train=create_sentences(df_train)
    sentences_test=create_sentences(df_test)
    x_train, y_train = zip(*sentences_train)
    x_test, y_test = zip(*sentences_test)
    text_classifier=TextClassifier()
    text_classifier.fit(x_train, y_train)
    y_pred = text_classifier.predict(x_test)
    df_test['predicted']=y_pred
    filename="/content/drive/MyDrive/DATA/SVM"+str(i)+'.csv'
    df_test.to_csv(filename)
    f.append(f1_score(y_test, y_pred,average='macro'))
    f1.append(get_depth_f1(df_test,1))
    f2.append(get_depth_f1(df_test,2))
    f3.append(get_depth_f1(df_test,3))
    f4.append(get_depth_f1(df_test,4))
    f5.append(get_depth_f1(df_test,5))

print("f: "+str(np.mean(f))+"+"+str(np.std(f, ddof = 1)))
print("f1: "+str(np.mean(f1))+"+"+str(np.std(f1, ddof = 1)))
print("f2: "+str(np.mean(f2))+"+"+str(np.std(f2, ddof = 1)))
print("f3: "+str(np.mean(f3))+"+"+str(np.std(f3, ddof = 1)))
print("f4: "+str(np.mean(f4))+"+"+str(np.std(f4, ddof = 1)))
print("f5: "+str(np.mean(f5))+"+"+str(np.std(f5, ddof = 1)))

f: 0.5814944220161355+0.014710859049724941
f1: 0.6006518201108779+0.06745646440246371
f2: 0.5746718842473272+0.01750741325619133
f3: 0.49993026665811346+0.05111860555236531
f4: 0.5155522237209406+0.10012384602799053
f5: 0.472909064720317+0.1130151559665336
[0.5856178438014196, 0.5765555839364268, 0.578176449075225, 0.5775772212228832, 0.5757272820843918, 0.5909993888770427, 0.5475099163915668, 0.6013348791727348, 0.586397632823067, 0.5950480227765976]
[0.6214896214896215, 0.6170989517110411, 0.5040364216652586, 0.6761408692893931, 0.5576814136651865, 0.7334512953132797, 0.5341880341880342, 0.6091726889491117, 0.5699521531100479, 0.5833067517278043]
[0.5684665122351539, 0.5612756954603473, 0.5823193765335689, 0.5608017976884305, 0.5761988676366431, 0.5695096581000945, 0.5454402290221138, 0.5925250724434244, 0.5838283600417511, 0.6063532733117446]
[0.570084262880873, 0.4579737732656515, 0.5534064522503829, 0.48089777516416743, 0.4695340501792115, 0.4748032495557248, 0.47202202001576676, 

# Kill Process

In [None]:
!apt install psmisc
!sudo fuser /dev/nvidia*

Reading package lists... Done
Building dependency tree       
Reading state information... Done
psmisc is already the newest version (23.1-1ubuntu0.1).
0 upgraded, 0 newly installed, 0 to remove and 39 not upgraded.
/dev/nvidia0:        19745m
/dev/nvidiactl:      19745m
/dev/nvidia-uvm:     19745m


In [None]:
!kill -9 19745

In [None]:
!nvidia-smi

Thu Mar 31 10:14:08 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P0    34W / 250W |   1371MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import torch
torch.cuda.get_device_name(0)

'Tesla P100-PCIE-16GB'