In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from keras.models import Sequential
from keras.layers import Dense, Dropout
import re
from sklearn.utils import resample

# Preprocessing function
def preprocess_text(text):
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = text.lower()  # Convert to lowercase
    return text

# Load dataset
data = pd.read_excel("news.xlsx", engine="openpyxl")
data['news'] = data['news'].apply(preprocess_text)
X = data['news']
y = data['label'].astype('category').cat.codes  # Encode labels

# Convert text to feature vectors using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X_tfidf = vectorizer.fit_transform(X).toarray()

# Cross-validation setup
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []

for train_idx, test_idx in kf.split(X_tfidf, y):
    X_train, X_test = X_tfidf[train_idx], X_tfidf[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    # Base models
    rf = RandomForestClassifier(n_estimators=10, random_state=42)
    lr = LogisticRegression(max_iter=1000, random_state=42)
    svm = SVC(probability=True, random_state=42)

    rf.fit(X_train, y_train)
    lr.fit(X_train, y_train)
    svm.fit(X_train, y_train)

    rf_preds = rf.predict_proba(X_train)
    lr_preds = lr.predict_proba(X_train)
    svm_preds = svm.predict_proba(X_train)
    stacked_features = np.hstack((rf_preds, lr_preds, svm_preds))

    # Meta-model
    meta_model = Sequential()
    meta_model.add(Dense(128, activation='relu', input_dim=stacked_features.shape[1]))
    meta_model.add(Dropout(0.5))
    meta_model.add(Dense(64, activation='relu'))
    meta_model.add(Dropout(0.5))
    meta_model.add(Dense(len(y.unique()), activation='softmax'))
    meta_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    meta_model.fit(stacked_features, y_train, epochs=10, batch_size=32, verbose=0)

    # Test predictions
    rf_test_preds = rf.predict_proba(X_test)
    lr_test_preds = lr.predict_proba(X_test)
    svm_test_preds = svm.predict_proba(X_test)
    stacked_test_features = np.hstack((rf_test_preds, lr_test_preds, svm_test_preds))
    meta_test_preds = np.argmax(meta_model.predict(stacked_test_features), axis=1)

    # Evaluate
    accuracy_scores.append(accuracy_score(y_test, meta_test_preds))
    precision_scores.append(precision_score(y_test, meta_test_preds, average='weighted'))
    recall_scores.append(recall_score(y_test, meta_test_preds, average='weighted'))
    f1_scores.append(f1_score(y_test, meta_test_preds, average='weighted'))

# Print average performance
print(f"Accuracy: {np.mean(accuracy_scores)} ± {np.std(accuracy_scores)}")
print(f"Precision: {np.mean(precision_scores)} ± {np.std(precision_scores)}")
print(f"Recall: {np.mean(recall_scores)} ± {np.std(recall_scores)}")
print(f"F1-Score: {np.mean(f1_scores)} ± {np.std(f1_scores)}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Accuracy: 0.9228729265314632 ± 0.00972392455109911
Precision: 0.9246476512023005 ± 0.008516217663304279
Recall: 0.9228729265314632 ± 0.00972392455109911
F1-Score: 0.9220699307299496 ± 0.010126001893883244


# Smote

In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from keras.models import Sequential
from keras.layers import Dense, Dropout
import re
from imblearn.over_sampling import SMOTE

# Preprocessing function
def preprocess_text(text):
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = text.lower()  # Convert to lowercase
    return text

# Load dataset
data = pd.read_excel("news.xlsx", engine="openpyxl")
data['news'] = data['news'].apply(preprocess_text)
X = data['news']
y = data['label'].astype('category').cat.codes  # Encode labels

# Convert text to feature vectors using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X_tfidf = vectorizer.fit_transform(X).toarray()

# Cross-validation setup
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []

for train_idx, test_idx in kf.split(X_tfidf, y):
    X_train, X_test = X_tfidf[train_idx], X_tfidf[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    # Apply SMOTE to balance classes
    smote = SMOTE(random_state=42)
    X_train, y_train = smote.fit_resample(X_train, y_train)

    # Base models
    rf = RandomForestClassifier(n_estimators=10, random_state=42)
    lr = LogisticRegression(max_iter=1000, random_state=42)
    svm = SVC(probability=True, random_state=42)

    rf.fit(X_train, y_train)
    lr.fit(X_train, y_train)
    svm.fit(X_train, y_train)

    rf_preds = rf.predict_proba(X_train)
    lr_preds = lr.predict_proba(X_train)
    svm_preds = svm.predict_proba(X_train)
    stacked_features = np.hstack((rf_preds, lr_preds, svm_preds))

    # Meta-model
    meta_model = Sequential()
    meta_model.add(Dense(128, activation='relu', input_dim=stacked_features.shape[1]))
    meta_model.add(Dropout(0.5))
    meta_model.add(Dense(64, activation='relu'))
    meta_model.add(Dropout(0.5))
    meta_model.add(Dense(len(np.unique(y)), activation='softmax'))  # Number of unique classes
    meta_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    meta_model.fit(stacked_features, y_train, epochs=10, batch_size=32, verbose=0)

    # Test predictions
    rf_test_preds = rf.predict_proba(X_test)
    lr_test_preds = lr.predict_proba(X_test)
    svm_test_preds = svm.predict_proba(X_test)
    stacked_test_features = np.hstack((rf_test_preds, lr_test_preds, svm_test_preds))
    meta_test_preds = np.argmax(meta_model.predict(stacked_test_features), axis=1)

    # Evaluate
    accuracy_scores.append(accuracy_score(y_test, meta_test_preds))
    precision_scores.append(precision_score(y_test, meta_test_preds, average='weighted'))
    recall_scores.append(recall_score(y_test, meta_test_preds, average='weighted'))
    f1_scores.append(f1_score(y_test, meta_test_preds, average='weighted'))

# Print average performance
print(f"Accuracy: {np.mean(accuracy_scores)} ± {np.std(accuracy_scores)}")
print(f"Precision: {np.mean(precision_scores)} ± {np.std(precision_scores)}")
print(f"Recall: {np.mean(recall_scores)} ± {np.std(recall_scores)}")
print(f"F1-Score: {np.mean(f1_scores)} ± {np.std(f1_scores)}")

[WinError 2] The system cannot find the file specified
  File "C:\Users\mobee\anaconda3\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "C:\Users\mobee\anaconda3\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\mobee\anaconda3\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Users\mobee\anaconda3\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Accuracy: 0.9277524643378303 ± 0.003116180907960876
Precision: 0.9287531729673615 ± 0.0030931157554715764
Recall: 0.9277524643378303 ± 0.003116180907960876
F1-Score: 0.9272091280730022 ± 0.0032341651889648197


# T Test

In [4]:
from scipy.stats import ttest_rel

# Cross-validation setup
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
stacked_accuracy_scores, baseline_accuracy_scores = [], []

for train_idx, test_idx in kf.split(X_tfidf, y):
    X_train, X_test = X_tfidf[train_idx], X_tfidf[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    # Apply SMOTE to balance classes
    smote = SMOTE(random_state=42)
    X_train, y_train = smote.fit_resample(X_train, y_train)

    # Base models
    rf = RandomForestClassifier(n_estimators=10, random_state=42)
    lr = LogisticRegression(max_iter=1000, random_state=42)
    svm = SVC(probability=True, random_state=42)

    rf.fit(X_train, y_train)
    lr.fit(X_train, y_train)
    svm.fit(X_train, y_train)

    # Stacked model predictions
    rf_preds = rf.predict_proba(X_train)
    lr_preds = lr.predict_proba(X_train)
    svm_preds = svm.predict_proba(X_train)
    stacked_features = np.hstack((rf_preds, lr_preds, svm_preds))

    # Meta-model
    meta_model = Sequential()
    meta_model.add(Dense(128, activation='relu', input_dim=stacked_features.shape[1]))
    meta_model.add(Dropout(0.5))
    meta_model.add(Dense(64, activation='relu'))
    meta_model.add(Dropout(0.5))
    meta_model.add(Dense(len(np.unique(y)), activation='softmax'))
    meta_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    meta_model.fit(stacked_features, y_train, epochs=10, batch_size=32, verbose=0)

    # Test predictions for stacked model
    rf_test_preds = rf.predict_proba(X_test)
    lr_test_preds = lr.predict_proba(X_test)
    svm_test_preds = svm.predict_proba(X_test)
    stacked_test_features = np.hstack((rf_test_preds, lr_test_preds, svm_test_preds))
    meta_test_preds = np.argmax(meta_model.predict(stacked_test_features), axis=1)

    # Test predictions for baseline model (e.g., Random Forest)
    baseline_preds = rf.predict(X_test)

    # Evaluate stacked model
    stacked_accuracy_scores.append(accuracy_score(y_test, meta_test_preds))

    # Evaluate baseline model
    baseline_accuracy_scores.append(accuracy_score(y_test, baseline_preds))

# Perform T-Test
t_statistic, p_value = ttest_rel(stacked_accuracy_scores, baseline_accuracy_scores)

# Print results
print(f"Stacked Model Accuracy: {np.mean(stacked_accuracy_scores)} ± {np.std(stacked_accuracy_scores)}")
print(f"Baseline Model Accuracy: {np.mean(baseline_accuracy_scores)} ± {np.std(baseline_accuracy_scores)}")
print(f"T-Test Results: t-statistic = {t_statistic}, p-value = {p_value}")

# Interpret p-value
if p_value < 0.05:
    print("The difference in performance is statistically significant (p < 0.05).")
else:
    print("The difference in performance is not statistically significant (p >= 0.05).")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Stacked Model Accuracy: 0.9228702462848805 ± 0.0038164461698806432
Baseline Model Accuracy: 0.860385359897555 ± 0.007740764963702081
T-Test Results: t-statistic = 17.628660170222233, p-value = 6.081552661174083e-05
The difference in performance is statistically significant (p < 0.05).


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import Dataset, DataLoader
import torch
from torch import nn
from torch.optim import AdamW
from urduhack.preprocessing import normalize_whitespace, remove_punctuation
from urduhack.tokenization import sentence_tokenizer
import os
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from keras.models import Sequential
from keras.layers import Dense, Dropout
import re
from sklearn.utils import resample

# Preprocessing function
def preprocess_text(text):
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = text.lower()  # Convert to lowercase
    return text

# Load dataset
data = pd.read_excel("news.xlsx", engine="openpyxl")
data['news'] = data['news'].apply(preprocess_text)
X = data['news']
y = data['label'].astype('category').cat.codes  # Encode labels

# Convert text to feature vectors using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(X).toarray()


In [1]:
import numpy as np
import pandas as pd
import torch
import os
import re

from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch import nn
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.feature_extraction.text import TfidfVectorizer

# Enable multi-processing for tokenization
os.environ["TOKENIZERS_PARALLELISM"] = "true"

# Load pre-trained BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Preprocessing function
def preprocess_text(text):
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text.lower()  # Convert to lowercase

# Load dataset
data = pd.read_excel("news.xlsx", engine="openpyxl")
data['news'] = data['news'].apply(preprocess_text)
X = data['news']
y = data['label'].astype('category').cat.codes  # Encode labels

# Convert text to feature vectors using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(X).toarray()
X = pd.DataFrame(X)  # Convert to DataFrame to avoid shape issues

# Function to tokenize text
def tokenize_text(texts, max_length=128):
    return tokenizer(
        texts.tolist(),  # Convert pandas Series to list
        padding=True,
        truncation=True,
        max_length=max_length,
        return_tensors='pt'
    )

# Custom Dataset class
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = torch.tensor(labels, dtype=torch.long)  # Convert labels to tensor

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)

# Training function
def train_model(model, train_loader, val_loader, optimizer, device, epochs=3):
    model.to(device)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader)}")

        # Validation
        model.eval()
        val_preds, val_labels = [], []
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                outputs = model(input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                preds = torch.argmax(logits, dim=-1)

                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())

        # Calculate metrics
        accuracy = accuracy_score(val_labels, val_preds)
        precision = precision_score(val_labels, val_preds, average='binary')  # Change to 'macro' for multiclass
        recall = recall_score(val_labels, val_preds, average='binary')
        f1 = f1_score(val_labels, val_preds, average='binary')

        print(f"Validation Metrics - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-score: {f1}")

    return val_preds, val_labels

# Initialize K-Fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Store evaluation metrics for each fold
results = []

for fold, (train_idx, val_idx) in enumerate(kfold.split(X)):
    print(f"Fold {fold + 1}")

    # Split data into training and validation sets
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    # Convert feature vectors back to text (if needed)
    X_train_text = data.iloc[train_idx]['news']
    X_val_text = data.iloc[val_idx]['news']

    # Tokenize the training and validation data
    X_train_tokens = tokenize_text(X_train_text)
    X_val_tokens = tokenize_text(X_val_text)

    # Create custom datasets
    train_dataset = CustomDataset(X_train_tokens, y_train.tolist())
    val_dataset = CustomDataset(X_val_tokens, y_val.tolist())

    # Create DataLoader
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=16)

    # Load pre-trained BERT model for sequence classification
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(y.unique()))

    # Set up optimizer
    optimizer = AdamW(model.parameters(), lr=2e-5)

    # Train and evaluate the model
    val_preds, val_labels = train_model(model, train_loader, val_loader, optimizer, device)

    # Calculate metrics for the current fold
    accuracy = accuracy_score(val_labels, val_preds)
    precision = precision_score(val_labels, val_preds, average='binary')
    recall = recall_score(val_labels, val_preds, average='binary')
    f1 = f1_score(val_labels, val_preds, average='binary')

    # Store metrics for the current fold
    results.append({
        'Fold': fold + 1,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-score': f1
    })

    print(f"Fold {fold + 1} Metrics:")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-score: {f1}")
    print("-" * 50)

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Calculate average metrics across all folds
avg_metrics = pd.DataFrame({
    'Fold': ['Average'],
    'Accuracy': [results_df['Accuracy'].mean()],
    'Precision': [results_df['Precision'].mean()],
    'Recall': [results_df['Recall'].mean()],
    'F1-score': [results_df['F1-score'].mean()]
})

# Append the average metrics to the results table
results_df = pd.concat([results_df, avg_metrics], ignore_index=True)

# Print the results table
print("\nResults Table:")
print(results_df.to_string(index=False))


Fold 1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.6683362707859133


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Validation Metrics - Accuracy: 0.5670731707317073, Precision: 0.0, Recall: 0.0, F1-score: 0.0
Epoch 2, Loss: 0.6104997665416904
Validation Metrics - Accuracy: 0.7548780487804878, Precision: 0.808, Recall: 0.5690140845070423, F1-score: 0.6677685950413224
Epoch 3, Loss: 0.47376740102360887
Validation Metrics - Accuracy: 0.7804878048780488, Precision: 0.8205128205128205, Recall: 0.6309859154929578, F1-score: 0.7133757961783439
Fold 1 Metrics:
Accuracy: 0.7804878048780488
Precision: 0.8205128205128205
Recall: 0.6309859154929578
F1-score: 0.7133757961783439
--------------------------------------------------
Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.6784922625960373


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Validation Metrics - Accuracy: 0.6134146341463415, Precision: 0.0, Recall: 0.0, F1-score: 0.0
Epoch 2, Loss: 0.6698271005618863
Validation Metrics - Accuracy: 0.7170731707317073, Precision: 0.7607361963190185, Recall: 0.3911671924290221, F1-score: 0.5166666666666667
Epoch 3, Loss: 0.5577212550291201
Validation Metrics - Accuracy: 0.7597560975609756, Precision: 0.718978102189781, Recall: 0.6214511041009464, F1-score: 0.6666666666666666
Fold 2 Metrics:
Accuracy: 0.7597560975609756
Precision: 0.718978102189781
Recall: 0.6214511041009464
F1-score: 0.6666666666666666
--------------------------------------------------
Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.6674401544943088
Validation Metrics - Accuracy: 0.6605616605616605, Precision: 0.75, Recall: 0.22085889570552147, F1-score: 0.3412322274881517
Epoch 2, Loss: 0.5694636528084918
Validation Metrics - Accuracy: 0.724053724053724, Precision: 0.6893939393939394, Recall: 0.558282208588957, F1-score: 0.6169491525423729
Epoch 3, Loss: 0.4749952661554988
Validation Metrics - Accuracy: 0.7912087912087912, Precision: 0.739938080495356, Recall: 0.7331288343558282, F1-score: 0.736517719568567
Fold 3 Metrics:
Accuracy: 0.7912087912087912
Precision: 0.739938080495356
Recall: 0.7331288343558282
F1-score: 0.736517719568567
--------------------------------------------------
Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.6612022975595986
Validation Metrics - Accuracy: 0.6251526251526252, Precision: 0.522633744855967, Recall: 0.7720364741641338, F1-score: 0.6233128834355828
Epoch 2, Loss: 0.5524608352562276
Validation Metrics - Accuracy: 0.7655677655677655, Precision: 0.8309178743961353, Recall: 0.5227963525835866, F1-score: 0.6417910447761194
Epoch 3, Loss: 0.4472556909409965
Validation Metrics - Accuracy: 0.7716727716727717, Precision: 0.669047619047619, Recall: 0.8541033434650456, F1-score: 0.7503337783711616
Fold 4 Metrics:
Accuracy: 0.7716727716727717
Precision: 0.669047619047619
Recall: 0.8541033434650456
F1-score: 0.7503337783711616
--------------------------------------------------
Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.6623092508897549
Validation Metrics - Accuracy: 0.6910866910866911, Precision: 0.5833333333333334, Recall: 0.6888888888888889, F1-score: 0.6317321688500728
Epoch 2, Loss: 0.5330579762051745
Validation Metrics - Accuracy: 0.7838827838827839, Precision: 0.7899159663865546, Recall: 0.5968253968253968, F1-score: 0.6799276672694394
Epoch 3, Loss: 0.4338718640004716
Validation Metrics - Accuracy: 0.778998778998779, Precision: 0.7278911564625851, Recall: 0.6793650793650794, F1-score: 0.7027914614121511
Fold 5 Metrics:
Accuracy: 0.778998778998779
Precision: 0.7278911564625851
Recall: 0.6793650793650794
F1-score: 0.7027914614121511
--------------------------------------------------

Results Table:
   Fold  Accuracy  Precision   Recall  F1-score
      1  0.780488   0.820513 0.630986  0.713376
      2  0.759756   0.718978 0.621451  0.666667
      3  0.791209   0.739938 0.733129  0.736518
      4  0.771673   0.669048 0.854103  0.750334
      5  0.778999   0.727891 0.679365  0.7027

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from transformers import (
    XLMRobertaTokenizer, XLMRobertaForSequenceClassification,
    RobertaTokenizer, RobertaForSequenceClassification, AdamW
)
from torch.utils.data import Dataset, DataLoader
import torch
from torch import nn
from torch.optim import AdamW
import os
import re

# Preprocessing function
def preprocess_text(text):
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = text.lower()  # Convert to lowercase
    return text

# Load dataset
data = pd.read_excel("news.xlsx", engine="openpyxl")
data['news'] = data['news'].apply(preprocess_text)
X = data['news']
y = data['label'].astype('category').cat.codes  # Encode labels

# Enable multi-processing
os.environ["TOKENIZERS_PARALLELISM"] = "true"

# Initialize K-Fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Store evaluation metrics for each fold
results_xlm_roberta = []
results_roberta = []

# Custom Dataset class
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Training function
def train_model(model, train_loader, val_loader, optimizer, device, epochs=3):
    model.to(device)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader)}")

        # Validation
        model.eval()
        val_preds, val_labels = [], []
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                outputs = model(input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                preds = torch.argmax(logits, dim=-1)

                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())

        # Calculate metrics
        accuracy = accuracy_score(val_labels, val_preds)
        precision = precision_score(val_labels, val_preds, average='binary')
        recall = recall_score(val_labels, val_preds, average='binary')
        f1 = f1_score(val_labels, val_preds, average='binary')

        print(f"Validation Metrics - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-score: {f1}")

    return val_preds, val_labels

# Function to evaluate a model
def evaluate_model(model_name, tokenizer_class, model_class, results):
    print(f"\nEvaluating {model_name}...")
    tokenizer = tokenizer_class.from_pretrained(model_name)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    for fold, (train_idx, val_idx) in enumerate(kfold.split(X)):
        print(f"\nFold {fold + 1} for {model_name}")

        # Split data into training and validation sets
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

        # Tokenize the training and validation data
        X_train_tokens = tokenizer(
            X_train.tolist(), padding=True, truncation=True, max_length=128, return_tensors='pt'
        )
        X_val_tokens = tokenizer(
            X_val.tolist(), padding=True, truncation=True, max_length=128, return_tensors='pt'
        )

        # Create custom datasets
        train_dataset = CustomDataset(X_train_tokens, y_train.tolist())
        val_dataset = CustomDataset(X_val_tokens, y_val.tolist())

        # Create DataLoader
        train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=16)

        # Load pre-trained model for sequence classification
        model = model_class.from_pretrained(model_name, num_labels=2)

        # Set up optimizer
        optimizer = AdamW(model.parameters(), lr=2e-5)

        # Train and evaluate the model
        val_preds, val_labels = train_model(model, train_loader, val_loader, optimizer, device)

        # Calculate metrics for the current fold
        accuracy = accuracy_score(val_labels, val_preds)
        precision = precision_score(val_labels, val_preds, average='binary')
        recall = recall_score(val_labels, val_preds, average='binary')
        f1 = f1_score(val_labels, val_preds, average='binary')

        # Store metrics for the current fold
        results.append({
            'Fold': fold + 1,
            'Accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1-score': f1
        })

        print(f"Fold {fold + 1} Metrics for {model_name}:")
        print(f"Accuracy: {accuracy}")
        print(f"Precision: {precision}")
        print(f"Recall: {recall}")
        print(f"F1-score: {f1}")
        print("-" * 50)

    return results

# Evaluate XLMRoberta
results_xlm_roberta = evaluate_model(
    "xlm-roberta-base",
    XLMRobertaTokenizer,
    XLMRobertaForSequenceClassification,
    results_xlm_roberta
)

# Evaluate Roberta
results_roberta = evaluate_model(
    "roberta-base",
    RobertaTokenizer,
    RobertaForSequenceClassification,
    results_roberta
)

# Convert results to DataFrames
results_xlm_roberta_df = pd.DataFrame(results_xlm_roberta)
results_roberta_df = pd.DataFrame(results_roberta)

# Calculate average metrics across all folds for each model
avg_metrics_xlm_roberta = pd.DataFrame({
    'Fold': ['Average'],
    'Accuracy': [results_xlm_roberta_df['Accuracy'].mean()],
    'Precision': [results_xlm_roberta_df['Precision'].mean()],
    'Recall': [results_xlm_roberta_df['Recall'].mean()],
    'F1-score': [results_xlm_roberta_df['F1-score'].mean()]
})

avg_metrics_roberta = pd.DataFrame({
    'Fold': ['Average'],
    'Accuracy': [results_roberta_df['Accuracy'].mean()],
    'Precision': [results_roberta_df['Precision'].mean()],
    'Recall': [results_roberta_df['Recall'].mean()],
    'F1-score': [results_roberta_df['F1-score'].mean()]
})

# Append the average metrics to the results tables
results_xlm_roberta_df = pd.concat([results_xlm_roberta_df, avg_metrics_xlm_roberta], ignore_index=True)
results_roberta_df = pd.concat([results_roberta_df, avg_metrics_roberta], ignore_index=True)

# Print the results tables
print("\nResults for XLMRoberta:")
print(results_xlm_roberta_df.to_string(index=False))

print("\nResults for Roberta:")
print(results_roberta_df.to_string(index=False))


Evaluating xlm-roberta-base...

Fold 1 for xlm-roberta-base


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.5101673165472542
Validation Metrics - Accuracy: 0.8768292682926829, Precision: 0.8175, Recall: 0.9211267605633803, F1-score: 0.866225165562914
Epoch 2, Loss: 0.25060298875337694
Validation Metrics - Accuracy: 0.9109756097560976, Precision: 0.8615384615384616, Recall: 0.9464788732394366, F1-score: 0.9020134228187919
Epoch 3, Loss: 0.1453780756672708
Validation Metrics - Accuracy: 0.9512195121951219, Precision: 0.972972972972973, Recall: 0.9126760563380282, F1-score: 0.9418604651162791
Fold 1 Metrics for xlm-roberta-base:
Accuracy: 0.9512195121951219
Precision: 0.972972972972973
Recall: 0.9126760563380282
F1-score: 0.9418604651162791
--------------------------------------------------

Fold 2 for xlm-roberta-base


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.49591529223977066
Validation Metrics - Accuracy: 0.8975609756097561, Precision: 0.8498498498498499, Recall: 0.8927444794952681, F1-score: 0.8707692307692307
Epoch 2, Loss: 0.25000100677333226
Validation Metrics - Accuracy: 0.9390243902439024, Precision: 0.9211356466876972, Recall: 0.9211356466876972, F1-score: 0.9211356466876972
Epoch 3, Loss: 0.13322081134722727
Validation Metrics - Accuracy: 0.9451219512195121, Precision: 0.9, Recall: 0.9652996845425867, F1-score: 0.9315068493150684
Fold 2 Metrics for xlm-roberta-base:
Accuracy: 0.9451219512195121
Precision: 0.9
Recall: 0.9652996845425867
F1-score: 0.9315068493150684
--------------------------------------------------

Fold 3 for xlm-roberta-base


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.48300543932653056
Validation Metrics - Accuracy: 0.8363858363858364, Precision: 0.8934426229508197, Recall: 0.6687116564417178, F1-score: 0.7649122807017544
Epoch 2, Loss: 0.2610171414548304
Validation Metrics - Accuracy: 0.9401709401709402, Precision: 0.9085545722713865, Recall: 0.9447852760736196, F1-score: 0.9263157894736842
Epoch 3, Loss: 0.15289165747647243
Validation Metrics - Accuracy: 0.9365079365079365, Precision: 0.8763736263736264, Recall: 0.9785276073619632, F1-score: 0.9246376811594202
Fold 3 Metrics for xlm-roberta-base:
Accuracy: 0.9365079365079365
Precision: 0.8763736263736264
Recall: 0.9785276073619632
F1-score: 0.9246376811594202
--------------------------------------------------

Fold 4 for xlm-roberta-base


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.44178742893585343
Validation Metrics - Accuracy: 0.9096459096459096, Precision: 0.9381443298969072, Recall: 0.8297872340425532, F1-score: 0.8806451612903226
Epoch 2, Loss: 0.27564226517044915
Validation Metrics - Accuracy: 0.8986568986568987, Precision: 0.8075, Recall: 0.9817629179331308, F1-score: 0.8861454046639232
Epoch 3, Loss: 0.1318964233122221
Validation Metrics - Accuracy: 0.9181929181929182, Precision: 0.8341836734693877, Recall: 0.993920972644377, F1-score: 0.9070735090152566
Fold 4 Metrics for xlm-roberta-base:
Accuracy: 0.9181929181929182
Precision: 0.8341836734693877
Recall: 0.993920972644377
F1-score: 0.9070735090152566
--------------------------------------------------

Fold 5 for xlm-roberta-base


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.5293294324380595
Validation Metrics - Accuracy: 0.8266178266178266, Precision: 0.7235142118863049, Recall: 0.8888888888888888, F1-score: 0.7977207977207977
Epoch 2, Loss: 0.3015527929838111
Validation Metrics - Accuracy: 0.8937728937728938, Precision: 0.7984293193717278, Recall: 0.9682539682539683, F1-score: 0.8751793400286944
Epoch 3, Loss: 0.1673616782739395
Validation Metrics - Accuracy: 0.9316239316239316, Precision: 0.884272997032641, Recall: 0.946031746031746, F1-score: 0.9141104294478528
Fold 5 Metrics for xlm-roberta-base:
Accuracy: 0.9316239316239316
Precision: 0.884272997032641
Recall: 0.946031746031746
F1-score: 0.9141104294478528
--------------------------------------------------

Evaluating roberta-base...

Fold 1 for roberta-base


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.5277713326419272
Validation Metrics - Accuracy: 0.7792682926829269, Precision: 0.967741935483871, Recall: 0.5070422535211268, F1-score: 0.6654343807763401
Epoch 2, Loss: 0.40688933599285965
Validation Metrics - Accuracy: 0.8695121951219512, Precision: 0.8734939759036144, Recall: 0.8169014084507042, F1-score: 0.8442503639010189
Epoch 3, Loss: 0.3552560709598588
Validation Metrics - Accuracy: 0.8512195121951219, Precision: 0.8319088319088319, Recall: 0.8225352112676056, F1-score: 0.8271954674220963
Fold 1 Metrics for roberta-base:
Accuracy: 0.8512195121951219
Precision: 0.8319088319088319
Recall: 0.8225352112676056
F1-score: 0.8271954674220963
--------------------------------------------------

Fold 2 for roberta-base


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.5486115062382163
Validation Metrics - Accuracy: 0.7878048780487805, Precision: 0.9863945578231292, Recall: 0.45741324921135645, F1-score: 0.625
Epoch 2, Loss: 0.42562918139667044
Validation Metrics - Accuracy: 0.8121951219512196, Precision: 0.8438818565400844, Recall: 0.6309148264984227, F1-score: 0.7220216606498195
Epoch 3, Loss: 0.41246494534539013
Validation Metrics - Accuracy: 0.8341463414634146, Precision: 0.9371980676328503, Recall: 0.61198738170347, F1-score: 0.7404580152671756
Fold 2 Metrics for roberta-base:
Accuracy: 0.8341463414634146
Precision: 0.9371980676328503
Recall: 0.61198738170347
F1-score: 0.7404580152671756
--------------------------------------------------

Fold 3 for roberta-base


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.5456773738308651
Validation Metrics - Accuracy: 0.8315018315018315, Precision: 0.8615384615384616, Recall: 0.6871165644171779, F1-score: 0.764505119453925
Epoch 2, Loss: 0.40438864472799185
Validation Metrics - Accuracy: 0.8534798534798534, Precision: 0.9401709401709402, Recall: 0.6748466257668712, F1-score: 0.7857142857142857
Epoch 3, Loss: 0.362359304871501
Validation Metrics - Accuracy: 0.8559218559218559, Precision: 0.8795620437956204, Recall: 0.7392638036809815, F1-score: 0.8033333333333333
Fold 3 Metrics for roberta-base:
Accuracy: 0.8559218559218559
Precision: 0.8795620437956204
Recall: 0.7392638036809815
F1-score: 0.8033333333333333
--------------------------------------------------

Fold 4 for roberta-base


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.4754488765829947
Validation Metrics - Accuracy: 0.833943833943834, Precision: 0.8163934426229508, Recall: 0.756838905775076, F1-score: 0.7854889589905363
Epoch 2, Loss: 0.38425289476063196
Validation Metrics - Accuracy: 0.8315018315018315, Precision: 0.799373040752351, Recall: 0.7750759878419453, F1-score: 0.7870370370370371
Epoch 3, Loss: 0.3477637877369799
Validation Metrics - Accuracy: 0.851037851037851, Precision: 0.8327974276527331, Recall: 0.7872340425531915, F1-score: 0.809375
Fold 4 Metrics for roberta-base:
Accuracy: 0.851037851037851
Precision: 0.8327974276527331
Recall: 0.7872340425531915
F1-score: 0.809375
--------------------------------------------------

Fold 5 for roberta-base


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.5554673905779676
Validation Metrics - Accuracy: 0.8070818070818071, Precision: 0.7813620071684588, Recall: 0.692063492063492, F1-score: 0.734006734006734
Epoch 2, Loss: 0.48751439190492396
Validation Metrics - Accuracy: 0.800976800976801, Precision: 1.0, Recall: 0.48253968253968255, F1-score: 0.6509635974304069
Epoch 3, Loss: 0.47423879831302457
Validation Metrics - Accuracy: 0.7985347985347986, Precision: 0.987012987012987, Recall: 0.48253968253968255, F1-score: 0.6481876332622601
Fold 5 Metrics for roberta-base:
Accuracy: 0.7985347985347986
Precision: 0.987012987012987
Recall: 0.48253968253968255
F1-score: 0.6481876332622601
--------------------------------------------------

Results for XLMRoberta:
   Fold  Accuracy  Precision   Recall  F1-score
      1  0.951220   0.972973 0.912676  0.941860
      2  0.945122   0.900000 0.965300  0.931507
      3  0.936508   0.876374 0.978528  0.924638
      4  0.918193   0.834184 0.993921  0.907074
      5  0.931624   0.884273 0.9