In [None]:
from transformers import DistilBertTokenizer, DistilBertModel
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from torch.optim import Adam
from tqdm import tqdm
from sklearn.metrics import ConfusionMatrixDisplay
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, recall_score, f1_score
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
class Attention(nn.Module):
    def __init__(self, hidden_dim, attention_dim):# in_features, out_features,batch_size
        super(Attention, self).__init__()
        self.in_features = hidden_dim
        self.out_features = attention_dim
        self.W = nn.Linear(hidden_dim, attention_dim)
        self.v = nn.Parameter(torch.zeros(attention_dim))# 

    def forward(self, x):
        # x: (batch_size, seq_len, in_features)
        batch_size, input_dim = x.size()
        # Applying linear transformation
        x = self.W(x)  # (batch_size, seq_len, out_features)
        # Applying activation function
        x = F.tanh(x)  # (batch_size, seq_len, out_features)
        # Computing attention scores
        x=x.view(batch_size,  1, -1) 
        v = torch.stack([self.v for i in range(batch_size)], dim=0).view(x.shape[0],-1,1)  

        scores = torch.bmm(x, v)  # (batch_size, seq_len, 1)

        attention_weights = F.softmax(scores.squeeze(2), dim=1)  # (batch_size, seq_len)
        # Applying attention weights
        weighted = torch.bmm(x.transpose(1, 2), attention_weights.unsqueeze(1))  # (batch_size, out_features, 1)
        return weighted.squeeze(2)  # (batch_size, out_features)
# torch.mean(BERT_output , dim=1)# 0是列，1是行
class SKFF(nn.Module):
    def __init__(self):
        super(SKFF, self).__init__()
        self.elu = nn.ELU(alpha=1.0)
        self.fc0 = nn.Linear(768, 768)
        self.fc1 = nn.Linear(768, 768)
        self.fc2 = nn.Linear(768, 768)
        self.fc3 = nn.Linear(768, 768)
        self.fc4 = nn.Linear(768, 768)
        self.Att_layer=Attention(768, 768)
        # self.Att_layer1=Attention(768, 768)
        self.softmax = nn.Softmax(dim=1)
    def forward(self, tfid,cls1,d1ap,d1mp):
        
        # print(tfid.shape,cls1.shape,d1ap.shape,d1mp.shape)
        x = cls1 + d1ap + d1mp
        
        x = self.Att_layer(x)
        cls1 = (self.softmax(self.fc1(x))) * cls1
        d1ap = (self.softmax(self.fc2(x)) )* d1ap
        d1mp = (self.softmax(self.fc3(x))) * d1mp
        x = cls1 + d1ap + d1mp
        
        return x


In [None]:
class AttentionMLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, attention_dim, output_dim):
        super(AttentionMLP, self).__init__()
        self.skkf=SKFF()# 输入通道为1
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.attention = Attention(hidden_dim, attention_dim)
        self.fc2 = nn.Linear(attention_dim, output_dim)
        
    
    def forward(self, x):
        x=self.skkf(x[0],x[1],x[2],x[3])
        batch_size,embedding_len=x.shape
        x=x.view(batch_size,embedding_len)
        x = F.relu(self.fc1(x))
        x = self.attention(x)
        x = self.fc2(x)

        return x

In [None]:


class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        
        self.bert =DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.bert.load_state_dict(torch.load("./model not BERT/BERT.pt"))
        
        self.AttentionModel=AttentionMLP(input_dim=768, # 输入隐层特征768
                        hidden_dim=128,  # 最后的注意力机制的也常常大小
                        attention_dim=32, # 注意力维度
                        output_dim=8)
        
        
    def forward(self, tfid, input_ids,mask_attation):
        BERT_output = self.bert(input_ids,mask_attation).last_hidden_state
        cls_embedding=BERT_output [:,0,:]
        mean_embedding =  torch.mean(BERT_output , dim=1)
        max_embedding =  torch.max(BERT_output , dim=1).values
        batch_size,embedding_len=cls_embedding.shape
        cls_embedding  = cls_embedding.view(batch_size,  embedding_len)
        mean_embedding = mean_embedding.view(batch_size, embedding_len)
        max_embedding  = max_embedding.view(batch_size,embedding_len)
        x = [tfid,cls_embedding,mean_embedding,max_embedding]
        x = self.AttentionModel(x)

        return x

In [None]:
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
tokenizer.padding_side = "right"
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
labels = {'suicide': 0, 'depression': 1, 'anxiety': 2, 'edanonymous': 3, 'socialanxiety': 4,
           'alcoholism': 5, 'healthanxiety': 6, 'addiction': 7}
class Dataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.labels = [labels[label] for label in df['label']]
        
        self.texts  = [text for text in df['text']]
        self.fures  = [tokenizer(text,
                                padding='max_length',
                                max_length=512,
                                truncation=True,
                                return_tensors="pt") for text in df['text']]
        
    def classes(self):
        return self.labels
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        # Get a batch of labels
        return np.array(self.labels[idx])
    
    def get_batch_texts(self, idx):
        # Get a batch of labels
        return self.texts[idx]
    
    def get_batch_fures(self, idx):
        # Get a batch of inputs
        return self.fures[idx]
    
    def __getitem__(self, idx):
        batch_texts = self.get_batch_texts(idx)
        batch_fure  = self.get_batch_fures(idx)
        batch_y     = self.get_batch_labels(idx)

        return batch_texts,batch_fure, batch_y

In [None]:

def train(model,df_train,df_val,df_test,learning_rate, epochs):
    print("begin function")
    print("Downloading Dataset")

    Train  = Dataset(df_train )
    val  = Dataset(df_val )
    test  = Dataset(df_test )
    train_dataloader = torch.utils.data.DataLoader(Train, batch_size=16, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=16, shuffle=True)
    test_dataloader = torch.utils.data.DataLoader(test, batch_size=16, shuffle=True)
    print("completed Dataset")
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    vectorizer = TfidfVectorizer(max_features= 512, min_df= 2)
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=learning_rate)
    print("cuda")

    criterion = criterion.to(device)
    print("begin loop")
    for epoch_num in range(epochs):
        total_acc_train = 0
        total_loss_train = 0
        print("epoch:",epoch_num)
        for text,train_input, train_label in tqdm(train_dataloader):
            train_label = train_label.type(torch.LongTensor)  ###################
            train_label = train_label.to(device)
            mask = train_input['attention_mask'].to(device)
            input_id = train_input["input_ids"].squeeze(1).to(device)
            tfid=torch.tensor(vectorizer.fit_transform(text).toarray(), dtype=torch.float32).to(device)
            model.zero_grad()# 梯度下降
            # print(input_id.shape, mask.shape)

            
            

            output=model(tfid,input_id,mask)
            batch_loss = criterion(output, train_label)
            total_loss_train += batch_loss.item()
            
            acc = (output.argmax(dim=1)==train_label).sum().item()
            total_acc_train += acc

            batch_loss.backward()
            optimizer.step()
        torch.save(model.state_dict(), "./Balance TF-skkf DistilBERT with BERT no TFIDF/model_epoch_{}.pt".format(epoch_num))
            
        total_acc_val = 0
        total_loss_val = 0
        
        with torch.no_grad():
            # 验证
            for text,val_input, val_label in tqdm(val_dataloader):
                val_label = val_label.type(torch.LongTensor) ###################
                val_label = val_label.to(device)
                mask = val_input['attention_mask'].to(device)
                input_id = val_input['input_ids'].squeeze(1).to(device)
                tfid=torch.tensor(vectorizer.transform(text).toarray(), dtype=torch.float32).to(device)

                output=model(tfid,input_id,mask)
                batch_loss = criterion(output, val_label)
                total_loss_val += batch_loss.item()
                
                acc = (output.argmax(dim=1)==val_label).sum().item()
                total_acc_val += acc
            print(f"Epochs: {epoch_num + 1}")
            print(f"| Train Loss: {total_loss_train/(len(train_dataloader)*16): .3f} \
                | Train Accuracy: {total_acc_train /( len(train_dataloader)*16): .3f}")
            print(f"| Val Loss: {total_loss_val / (len(val_dataloader)*16): .3f} \
                | Val Accuracy: {total_acc_val / (len(val_dataloader)*16): .3f}")

        # 测试
        predictions_labels = []
        true_labels = []
        total_acc_test = 0
        with torch.no_grad():
            result=[]
            for text,test_input, test_label in tqdm(test_dataloader):

                test_label = test_label.to(device)
                mask = test_input['attention_mask'].to(device)
                input_id = test_input['input_ids'].squeeze(1).to(device)
                tfid=torch.tensor(vectorizer.transform(text).toarray(), dtype=torch.float32).to(device)

                output=model(tfid,input_id,mask)
                result.append(output)
                # print(output)
                acc = (output.argmax(dim=1) == test_label).sum().item()
                total_acc_test += acc
                
                # add original labels
                true_labels += test_label.cpu().numpy().flatten().tolist()
                # get predicitons to list
                predictions_labels += output.argmax(dim=1).cpu().numpy().flatten().tolist()
        true_labels, pred_labels ,result=true_labels, predictions_labels,torch.cat(result, dim=0)
        accuracy = accuracy_score(true_labels, pred_labels)
        # print(f"Accuracy: {accuracy:.3f}")

        # 计算召回率
        recall = recall_score(true_labels, pred_labels, average='macro')  # 'macro'表示简单平均
        # print(f"Accuracy: {accuracy:.3f}, Recall: {recall:.3f}")

        # 计算F1分数
        f1 = f1_score(true_labels, pred_labels, average='macro')
        print(f"Accuracy: {accuracy:.3f},    Recall: {recall:.3f},    F1 Score: {f1:.3f}\n")
        # 绘图
        fig, ax = plt.subplots(figsize=(8, 8))
        cm = confusion_matrix(y_true=true_labels, y_pred=pred_labels, 
                            labels=range(len(labels)), normalize='true')
        disp = ConfusionMatrixDisplay(confusion_matrix=cm, 
                                    display_labels=list(labels.keys()))
        # 旋转 x 轴标签，以便更容易阅读
        disp.plot(ax=ax)
        plt.xticks(rotation=45)
        plt.savefig("Balance TF-skkf DistilBERT with BERT no TFIDF/matrix_EPOCHS{}.png".format(epoch_num),dpi=1080)

In [None]:
df_train = pd.read_csv('./strength data/train.csv') # ,nrows=100
df_val = pd.read_csv('./strength data/val.csv') # ,nrows=100
df_test = pd.read_csv('./strength data/test.csv') # ,nrows=100
print(df_train.shape,df_val.shape,df_test.shape)
# train  = Dataset(df_train ) 

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda")
LR = 1e-5
EPOCHS=10
Batch_Size=16

model = MyModel()
model.to(device)

In [None]:
train(model,df_train,df_val,df_test, LR, EPOCHS)