<a href="https://colab.research.google.com/github/Rayars/ECE/blob/main/BERT_MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install sentence-transformers
!pip install sklearn

Collecting sentence-transformers
  Downloading sentence-transformers-2.2.0.tar.gz (79 kB)
[K     |████████████████████████████████| 79 kB 5.2 MB/s 
[?25hCollecting transformers<5.0.0,>=4.6.0
  Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 44.8 MB/s 
Collecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 39.2 MB/s 
[?25hCollecting huggingface-hub
  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 6.9 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 25.9 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 5

In [8]:
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
from sentence_transformers import SentenceTransformer
import os
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
import torch.nn.functional as F   # 激励函数的库
from sklearn.model_selection import train_test_split
from tqdm import tqdm

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [3]:
# 定义全局变量
n_epochs = 10     # epoch 的数目
batch_size = 20  # 决定每次读取多少图片

class ECEDataset(Dataset):
    def __init__(self, data_file, transform=None, target_transform=None):
        self.data = pd.read_csv(data_file)
        self.clauses=self.data.iloc[:,[0,1,2,3,5]]
        self.labels=self.data.iloc[:,4]
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.clauses)

    def __getitem__(self, idx):
        clause = list(self.clauses.iloc[idx])
        label = self.labels.iloc[idx]
        if self.transform:
            clause = self.transform(clause)
        if self.target_transform:
            label = self.target_transform(label)
        return clause, label

#定义训练和数据集
train_data,test_data=train_test_split(ECEDataset("/content/sample_data/clause_keywords.csv"),test_size=0.2)
#print(train_data[0],test_data[0])


#创建加载器
train_loader=torch.utils.data.DataLoader(train_data, batch_size = batch_size, num_workers = 0,shuffle=True)
test_loader=torch.utils.data.DataLoader(test_data, batch_size = batch_size, num_workers = 0,shuffle=True)



In [23]:
# 建立一个四层感知机网络
class MLP(nn.Module):   # 继承 torch 的 Module
    def __init__(self):
        super(MLP,self).__init__()    # 
        # 初始化三层神经网络 两个全连接的隐藏层，一个输出层
        self.fc1 = torch.nn.Linear(768,256)  # 第一个隐含层  
        self.fc2 = torch.nn.Linear(256,64)  # 第二个隐含层
        self.fc3 = torch.nn.Linear(64,32)   # 第三个隐藏层
        self.fc4 = torch.nn.Linear(32,2)   #输出层

        
    def forward(self,din):
        # 前向传播， 输入值：din, 返回值 dout
        dout = F.relu(self.fc1(din))   # 使用 relu 激活函数
        dout = F.relu(self.fc2(dout))
        dout = F.relu(self.fc3(dout))
        dout = F.softmax(self.fc4(dout), dim=1)  # 输出层使用 softmax 激活函数
        # 2个数字实际上是2个类别，输出是概率分布，最后选取概率最大的作为预测值输出
        return dout


#建立Bbert+MLP网络
class Sent_bert_MLP(nn.Module):
    def __init__(self):
        super(Sent_bert_MLP,self).__init__()
        self.bert=SentenceTransformer('bert-base-nli-mean-tokens')
        self.MLP=MLP()

    def forward(self,input):
        sentence_embedding=self.bert.encode(input[4])
        #print(input[4])
        #print(sentence_embedding.shape)
        dout=self.MLP(torch.tensor(sentence_embedding))
        return dout


In [30]:
bert=SentenceTransformer('bert-base-nli-mean-tokens')

# 训练神经网络
def train():
    #定义损失函数和优化器
    lossfunc = nn.CrossEntropyLoss(weight=torch.from_numpy(np.array([1.0,99.0])).float())
    lossfunc.cuda()
    optimizer = torch.optim.SGD(params = model.parameters(), lr = 0.01)
    # 开始训练
    for epoch in range(n_epochs):
        print('Epoch:  {}  \t'.format(epoch+1))
        train_loss = 0.0
        for data,target in tqdm(train_loader):
            data=torch.tensor(bert.encode(data[4])) # embedding
            target=list(target)
            for i in range(len(target)):
              if target[i]=='yes':
                target[i]=1
              else:
                target[i]=0
            target=torch.tensor(target)
            data,target=data.to(device),target.to(device)
            optimizer.zero_grad()   # 清空上一步的残余更新参数值
            output = model(data)    # 得到预测值
            loss = lossfunc(output,target)  # 计算两者的误差
            loss.backward()         # 误差反向传播, 计算参数更新值
            optimizer.step()        # 将参数更新值施加到 net 的 parameters 上
            train_loss += loss.item()*data.size(0)
        train_loss = train_loss / len(train_loader.dataset)
        print('Training Loss: {:.6f}'.format( train_loss))
        # 每遍历一遍数据集，测试一下准确率
        test()

# 在数据集上测试神经网络
def test():
    correct = 0
    total = 0
    with torch.no_grad():  # 训练集中不需要反向传播
        for data,target in tqdm(test_loader):
            data=torch.tensor(bert.encode(data[4])) # embedding
            target=list(target)
            for i in range(len(target)):
              if target[i]=='yes':
                target[i]=1
              else:
                target[i]=0
            target=torch.tensor(target)
            data,target=data.to(device),target.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    print('Accuracy of the network on the test clause: %f %%' % (
        100.0 * correct / total))
    return 100.0 * correct / total

# 声明感知器网络
model = MLP()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#print(device)
model.to(device)
train()

Epoch:  1  	


100%|██████████| 1252/1252 [01:38<00:00, 12.65it/s]


Training Loss: 0.628371


100%|██████████| 313/313 [00:24<00:00, 12.76it/s]


Accuracy of the network on the test clause: 6.789137 %
Epoch:  2  	


100%|██████████| 1252/1252 [01:38<00:00, 12.66it/s]


Training Loss: 0.597985


100%|██████████| 313/313 [00:24<00:00, 12.73it/s]


Accuracy of the network on the test clause: 17.252396 %
Epoch:  3  	


100%|██████████| 1252/1252 [01:39<00:00, 12.56it/s]


Training Loss: 0.588143


100%|██████████| 313/313 [00:24<00:00, 12.83it/s]


Accuracy of the network on the test clause: 24.680511 %
Epoch:  4  	


100%|██████████| 1252/1252 [01:39<00:00, 12.54it/s]


Training Loss: 0.590327


100%|██████████| 313/313 [00:24<00:00, 12.75it/s]


Accuracy of the network on the test clause: 26.086262 %
Epoch:  5  	


100%|██████████| 1252/1252 [01:39<00:00, 12.59it/s]


Training Loss: 0.588485


100%|██████████| 313/313 [00:24<00:00, 12.79it/s]


Accuracy of the network on the test clause: 37.300319 %
Epoch:  6  	


100%|██████████| 1252/1252 [01:39<00:00, 12.64it/s]


Training Loss: 0.579054


100%|██████████| 313/313 [00:24<00:00, 12.78it/s]


Accuracy of the network on the test clause: 47.683706 %
Epoch:  7  	


100%|██████████| 1252/1252 [01:39<00:00, 12.61it/s]


Training Loss: 0.581925


100%|██████████| 313/313 [00:24<00:00, 12.78it/s]


Accuracy of the network on the test clause: 32.348243 %
Epoch:  8  	


100%|██████████| 1252/1252 [01:39<00:00, 12.60it/s]


Training Loss: 0.580222


100%|██████████| 313/313 [00:24<00:00, 12.74it/s]


Accuracy of the network on the test clause: 14.520767 %
Epoch:  9  	


100%|██████████| 1252/1252 [01:39<00:00, 12.62it/s]


Training Loss: 0.574996


100%|██████████| 313/313 [00:24<00:00, 12.74it/s]


Accuracy of the network on the test clause: 23.961661 %
Epoch:  10  	


100%|██████████| 1252/1252 [01:39<00:00, 12.57it/s]


Training Loss: 0.575216


100%|██████████| 313/313 [00:24<00:00, 12.76it/s]

Accuracy of the network on the test clause: 32.092652 %



