In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 设置随机种子
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)

# 加载数据
file_path = '../datasets_FIX2/FIX2_deduplicated_mangoNews_Nums3000p_CategoryMerge_new_undersampled_Example.csv'
# file_path = '../datasets_FIX2/FIX2_deduplicated_mangoNews_Nums3000p_CategoryMerge_new_undersampled.csv'

data = pd.read_csv(file_path,low_memory=False,lineterminator="\n")


# 加载BERT tokenizer和模型
model_name = '../bert-base-multilingual-cased'
tokenizer = BertTokenizer.from_pretrained(model_name)

# 将模型移动到GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 定义数据集类
class NewsDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, index):
        text = str(self.texts[index])
        label = self.labels[index]

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            return_token_type_ids=False,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt',
            truncation=True
        )

        return {
            'text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

# 将孟加拉语类别转换为数字标签
label_map = {label: i for i, label in enumerate(data['category1'].unique())}
labels = data['category1'].map(label_map).tolist()

# 定义训练和评估函数
def train_epoch(model, data_loader, optimizer, scheduler, device):
    model.train()
    total_loss = 0
    progress_bar = tqdm(data_loader, desc='Training', leave=False)
    for batch in progress_bar:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()
        progress_bar.set_postfix({'loss': loss.item()})
    return total_loss / len(data_loader)

def evaluate(model, data_loader, device, label_map):
    model.eval()
    predictions = []
    true_labels = []
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            batch_predictions = torch.argmax(logits, dim=1)
            predictions.extend(batch_predictions.tolist())
            true_labels.extend(labels.tolist())

    label_map_inv = {v: k for k, v in label_map.items()}
    print(label_map_inv)
    print(predictions)

    predictions = [label_map_inv[i] for i in predictions]
    true_labels = [label_map_inv[i] for i in true_labels]

    report = classification_report(true_labels, predictions, digits=4)
    return report

# 设置超参数
num_epochs = 2
batch_size = 4
learning_rate = 2e-5
num_classes = len(label_map)
kfold = KFold(n_splits=5, shuffle=True, random_state=seed)



In [3]:
# 存储所有fold的性能指标
all_reports = []
    
# K-Fold交叉验证
for fold, (train_idx, val_idx) in enumerate(kfold.split(data)):
    print(f'Fold {fold + 1}')
    
    train_data = data.iloc[train_idx]
    val_data = data.iloc[val_idx]

    train_texts = train_data['body'].tolist()
    train_labels = train_data['category1'].map(label_map).tolist()
    val_texts = val_data['body'].tolist()
    val_labels = val_data['category1'].map(label_map).tolist()

    train_dataset = NewsDataset(train_texts, train_labels, tokenizer)
    val_dataset = NewsDataset(val_texts, val_labels, tokenizer)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_classes)
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=learning_rate)
    total_steps = len(train_loader) * num_epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

    best_val_loss = float('inf')
    for epoch in range(num_epochs):
        train_loss = train_epoch(model, train_loader, optimizer, scheduler, device)
        val_report = evaluate(model, val_loader, device, label_map)

        print(f'Epoch {epoch + 1}/{num_epochs}')
        print(f'Train Loss: {train_loss:.4f}')
        print('Validation Report:')
        print(val_report)

        val_loss = 1 - float(val_report.split('\n')[-2].split()[-2])  # 提取验证集损失
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), f'best_model_fold_{fold + 1}.pth')

    print()

    # 在每个fold结束后,评估最佳模型在验证集上的性能
    best_model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_classes)
    best_model.load_state_dict(torch.load(f'best_model_fold_{fold + 1}.pth'))
    best_model.to(device)
    val_report = evaluate(best_model, val_loader, device, label_map)
    all_reports.append(val_report)

    print(f'Fold {fold + 1} Best Validation Report:')
    print(val_report)
    print()


Fold 1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
                                                                       

Epoch 1/2
Train Loss: 1.5010
Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     0.9167    0.6111    0.7333        18
    অর্থনীতি     0.7200    0.6207    0.6667        29
         আইন     0.8421    0.5926    0.6957        27
    খেলাধুলা     0.9583    1.0000    0.9787        23
     বিজ্ঞান     0.8000    0.6667    0.7273        24
      বিনোদন     0.8400    1.0000    0.9130        21
     রাজনীতি     0.7500    0.9600    0.8421        25
  লাইফস্টাইল     0.7500    0.7059    0.7273        17
      শিক্ষা     0.5185    0.8750    0.6512        16

    accuracy                         0.7750       200
   macro avg     0.7884    0.7813    0.7706       200
weighted avg     0.7940    0.7750    0.7714       200



                                                                       

Epoch 2/2
Train Loss: 0.6160
Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     0.9167    0.6111    0.7333        18
    অর্থনীতি     0.6552    0.6552    0.6552        29
         আইন     0.7241    0.7778    0.7500        27
    খেলাধুলা     0.9583    1.0000    0.9787        23
     বিজ্ঞান     0.9444    0.7083    0.8095        24
      বিনোদন     0.8696    0.9524    0.9091        21
     রাজনীতি     1.0000    0.8800    0.9362        25
  লাইফস্টাইল     0.7368    0.8235    0.7778        17
      শিক্ষা     0.5833    0.8750    0.7000        16

    accuracy                         0.8050       200
   macro avg     0.8209    0.8093    0.8055       200
weighted avg     0.8244    0.8050    0.8065       200




Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1 Best Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     0.9167    0.6111    0.7333        18
    অর্থনীতি     0.6552    0.6552    0.6552        29
         আইন     0.7241    0.7778    0.7500        27
    খেলাধুলা     0.9583    1.0000    0.9787        23
     বিজ্ঞান     0.9444    0.7083    0.8095        24
      বিনোদন     0.8696    0.9524    0.9091        21
     রাজনীতি     1.0000    0.8800    0.9362        25
  লাইফস্টাইল     0.7368    0.8235    0.7778        17
      শিক্ষা     0.5833    0.8750    0.7000        16

    accuracy                         0.8050       200
   macro avg     0.8209    0.8093    0.8055       200
weighted avg     0.8244    0.8050    0.8065       200


Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
                                                                       

Epoch 1/2
Train Loss: 1.5460
Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     0.8947    0.7391    0.8095        23
    অর্থনীতি     0.6667    0.7000    0.6829        20
         আইন     0.7059    0.5455    0.6154        22
    খেলাধুলা     1.0000    0.9600    0.9796        25
     বিজ্ঞান     0.8333    0.7143    0.7692        14
      বিনোদন     0.5946    1.0000    0.7458        22
     রাজনীতি     0.7826    0.7826    0.7826        23
  লাইফস্টাইল     0.8182    0.6429    0.7200        28
      শিক্ষা     0.8400    0.9130    0.8750        23

    accuracy                         0.7800       200
   macro avg     0.7929    0.7775    0.7756       200
weighted avg     0.7971    0.7800    0.7788       200



                                                                       

Epoch 2/2
Train Loss: 0.6223
Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     0.8333    0.8696    0.8511        23
    অর্থনীতি     0.6957    0.8000    0.7442        20
         আইন     0.8500    0.7727    0.8095        22
    খেলাধুলা     1.0000    0.9600    0.9796        25
     বিজ্ঞান     0.8571    0.8571    0.8571        14
      বিনোদন     0.9091    0.9091    0.9091        22
     রাজনীতি     0.8571    0.7826    0.8182        23
  লাইফস্টাইল     0.8214    0.8214    0.8214        28
      শিক্ষা     0.8333    0.8696    0.8511        23

    accuracy                         0.8500       200
   macro avg     0.8508    0.8491    0.8490       200
weighted avg     0.8533    0.8500    0.8508       200




Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 2 Best Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     0.8333    0.8696    0.8511        23
    অর্থনীতি     0.6957    0.8000    0.7442        20
         আইন     0.8500    0.7727    0.8095        22
    খেলাধুলা     1.0000    0.9600    0.9796        25
     বিজ্ঞান     0.8571    0.8571    0.8571        14
      বিনোদন     0.9091    0.9091    0.9091        22
     রাজনীতি     0.8571    0.7826    0.8182        23
  লাইফস্টাইল     0.8214    0.8214    0.8214        28
      শিক্ষা     0.8333    0.8696    0.8511        23

    accuracy                         0.8500       200
   macro avg     0.8508    0.8491    0.8490       200
weighted avg     0.8533    0.8500    0.8508       200


Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
                                                                       

Epoch 1/2
Train Loss: 1.3753
Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     0.8667    0.5417    0.6667        24
    অর্থনীতি     0.8333    0.5000    0.6250        20
         আইন     0.8462    1.0000    0.9167        22
    খেলাধুলা     1.0000    1.0000    1.0000        19
     বিজ্ঞান     0.6400    0.8000    0.7111        20
      বিনোদন     0.8571    0.9000    0.8780        20
     রাজনীতি     0.7857    1.0000    0.8800        22
  লাইফস্টাইল     0.7826    0.8571    0.8182        21
      শিক্ষা     0.9032    0.8750    0.8889        32

    accuracy                         0.8300       200
   macro avg     0.8350    0.8304    0.8205       200
weighted avg     0.8382    0.8300    0.8222       200



                                                                       

Epoch 2/2
Train Loss: 0.5621
Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     0.8947    0.7083    0.7907        24
    অর্থনীতি     0.6538    0.8500    0.7391        20
         আইন     0.9130    0.9545    0.9333        22
    খেলাধুলা     1.0000    1.0000    1.0000        19
     বিজ্ঞান     0.8235    0.7000    0.7568        20
      বিনোদন     1.0000    0.9000    0.9474        20
     রাজনীতি     0.7692    0.9091    0.8333        22
  লাইফস্টাইল     0.9474    0.8571    0.9000        21
      শিক্ষা     0.8788    0.9062    0.8923        32

    accuracy                         0.8650       200
   macro avg     0.8756    0.8650    0.8659       200
weighted avg     0.8752    0.8650    0.8658       200




Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 3 Best Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     0.8947    0.7083    0.7907        24
    অর্থনীতি     0.6538    0.8500    0.7391        20
         আইন     0.9130    0.9545    0.9333        22
    খেলাধুলা     1.0000    1.0000    1.0000        19
     বিজ্ঞান     0.8235    0.7000    0.7568        20
      বিনোদন     1.0000    0.9000    0.9474        20
     রাজনীতি     0.7692    0.9091    0.8333        22
  লাইফস্টাইল     0.9474    0.8571    0.9000        21
      শিক্ষা     0.8788    0.9062    0.8923        32

    accuracy                         0.8650       200
   macro avg     0.8756    0.8650    0.8659       200
weighted avg     0.8752    0.8650    0.8658       200


Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
                                                                       

Epoch 1/2
Train Loss: 1.4317
Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     0.8333    0.4348    0.5714        23
    অর্থনীতি     0.8889    0.6400    0.7442        25
         আইন     0.6757    0.9259    0.7813        27
    খেলাধুলা     0.9375    0.9375    0.9375        32
     বিজ্ঞান     0.9286    0.6190    0.7429        21
      বিনোদন     0.8750    1.0000    0.9333         7
     রাজনীতি     0.8333    0.8929    0.8621        28
  লাইফস্টাইল     0.6452    0.9091    0.7547        22
      শিক্ষা     0.7222    0.8667    0.7879        15

    accuracy                         0.7950       200
   macro avg     0.8155    0.8029    0.7906       200
weighted avg     0.8181    0.7950    0.7877       200



                                                                       

Epoch 2/2
Train Loss: 0.5922
Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     0.7647    0.5652    0.6500        23
    অর্থনীতি     0.7586    0.8800    0.8148        25
         আইন     0.7353    0.9259    0.8197        27
    খেলাধুলা     0.9394    0.9688    0.9538        32
     বিজ্ঞান     0.9167    0.5238    0.6667        21
      বিনোদন     0.8750    1.0000    0.9333         7
     রাজনীতি     0.9231    0.8571    0.8889        28
  লাইফস্টাইল     0.8000    0.9091    0.8511        22
      শিক্ষা     0.8125    0.8667    0.8387        15

    accuracy                         0.8300       200
   macro avg     0.8361    0.8330    0.8241       200
weighted avg     0.8374    0.8300    0.8235       200




Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 4 Best Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     0.7647    0.5652    0.6500        23
    অর্থনীতি     0.7586    0.8800    0.8148        25
         আইন     0.7353    0.9259    0.8197        27
    খেলাধুলা     0.9394    0.9688    0.9538        32
     বিজ্ঞান     0.9167    0.5238    0.6667        21
      বিনোদন     0.8750    1.0000    0.9333         7
     রাজনীতি     0.9231    0.8571    0.8889        28
  লাইফস্টাইল     0.8000    0.9091    0.8511        22
      শিক্ষা     0.8125    0.8667    0.8387        15

    accuracy                         0.8300       200
   macro avg     0.8361    0.8330    0.8241       200
weighted avg     0.8374    0.8300    0.8235       200


Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
                                                                       

Epoch 1/2
Train Loss: 1.4277
Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     1.0000    0.5882    0.7407        17
    অর্থনীতি     0.6957    0.6667    0.6809        24
         আইন     0.8571    0.8571    0.8571        21
    খেলাধুলা     0.8462    1.0000    0.9167        22
     বিজ্ঞান     0.9333    0.6087    0.7368        23
      বিনোদন     0.8571    0.9600    0.9057        25
     রাজনীতি     0.8095    0.8500    0.8293        20
  লাইফস্টাইল     0.8750    0.8077    0.8400        26
      শিক্ষা     0.6562    0.9545    0.7778        22

    accuracy                         0.8150       200
   macro avg     0.8367    0.8103    0.8094       200
weighted avg     0.8329    0.8150    0.8111       200



                                                                       

Epoch 2/2
Train Loss: 0.6597
Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     0.8333    0.5882    0.6897        17
    অর্থনীতি     0.8235    0.5833    0.6829        24
         আইন     0.7917    0.9048    0.8444        21
    খেলাধুলা     1.0000    1.0000    1.0000        22
     বিজ্ঞান     0.7143    0.8696    0.7843        23
      বিনোদন     0.8000    0.8000    0.8000        25
     রাজনীতি     0.7917    0.9500    0.8636        20
  লাইফস্টাইল     0.8519    0.8846    0.8679        26
      শিক্ষা     0.9048    0.8636    0.8837        22

    accuracy                         0.8300       200
   macro avg     0.8346    0.8271    0.8241       200
weighted avg     0.8344    0.8300    0.8258       200




Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 5 Best Validation Report:
              precision    recall  f1-score   support

    অন্যান্য     0.8333    0.5882    0.6897        17
    অর্থনীতি     0.8235    0.5833    0.6829        24
         আইন     0.7917    0.9048    0.8444        21
    খেলাধুলা     1.0000    1.0000    1.0000        22
     বিজ্ঞান     0.7143    0.8696    0.7843        23
      বিনোদন     0.8000    0.8000    0.8000        25
     রাজনীতি     0.7917    0.9500    0.8636        20
  লাইফস্টাইল     0.8519    0.8846    0.8679        26
      শিক্ষা     0.9048    0.8636    0.8837        22

    accuracy                         0.8300       200
   macro avg     0.8346    0.8271    0.8241       200
weighted avg     0.8344    0.8300    0.8258       200




In [4]:

# 计算并打印所有fold的平均性能
print('Average Performance Across All Folds:')
all_lines = [report.split('\n') for report in all_reports]
header = all_lines[0][0] + '\t' + '\t'.join([line.strip() for line in all_lines[0][-4:]])

values = []
for report in all_reports:
    lines = report.split('\n')
    cls_lines = lines[1:-5]
    cls_values = []
    for line in cls_lines:
        parts = line.split()
        if len(parts) >= 5:
            cls_values.append([float(val) if val != 'nan' else 0.0 for val in parts[1:-1]])
    values.append(cls_values)

avg_values = np.mean(values, axis=0)
cls_report = '\n'.join([f'{cls}\t{prec:.4f}\t{rec:.4f}\t{f1:.4f}\t{sup:.0f}' for cls, (prec, rec, f1, sup) in zip(label_map.keys(), avg_values)])

avg_acc = np.mean([float(line.split()[-2]) for report in all_reports for line in report.split('\n') if 'accuracy' in line])
avg_macro = np.mean([float(line.split()[-2]) for report in all_reports for line in report.split('\n') if 'macro avg' in line])
avg_weighted = np.mean([float(line.split()[-2]) for report in all_reports for line in report.split('\n') if 'weighted avg' in line])

avg_report = header + '\n' + cls_report + '\n' + f"accuracy\t{avg_acc:.4f}\nmacro avg\t{avg_macro:.4f}\nweighted avg\t{avg_weighted:.4f}"
print(avg_report)

Average Performance Across All Folds:


ValueError: not enough values to unpack (expected 4, got 3)