In [1]:
import pandas as pd
import numpy as np
import re
import nltk
import string

from sklearn.model_selection import train_test_split
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer


In [2]:
# Load LIAR-2 CSVs
train_df = pd.read_csv('LIAR2/train.csv', encoding='utf-8')
valid_df = pd.read_csv('LIAR2/valid.csv', encoding='utf-8')
test_df = pd.read_csv('LIAR2/test.csv', encoding='utf-8')

# Column names for LIAR-2
columns = [
    "id", "label", "statement", "date", "subject", "speaker", "speaker_description", "state_info",
    "true_counts", "mostly_true_counts", "half_true_counts", "mostly_false_counts",
    "false_counts", "pants_on_fire_counts", "context", "justification"
]

# Assign to DataFrames
train_df.columns = columns
valid_df.columns = columns
test_df.columns = columns

# ✅ Check
print(train_df.head())
print(train_df['label'].value_counts())

      id  label                                          statement  \
0  13847      5  90 percent of Americans "support universal bac...   
1  13411      1  Last year was one of the deadliest years ever ...   
2  10882      0  Bernie Sanders's plan is "to raise your taxes ...   
3  20697      4  Voter ID is supported by an overwhelming major...   
4   6095      2  Says Barack Obama "robbed Medicare (of) $716 b...   

               date                                            subject  \
0   October 2, 2017  government regulation;polls and public opinion...   
1      May 19, 2017  after the fact;congress;criminal justice;histo...   
2  October 28, 2015                                              taxes   
3  December 8, 2021                                      voter id laws   
4   August 12, 2012         federal budget;history;medicare;retirement   

          speaker                                speaker_description  \
0     chris abele  Chris Abele is Milwaukee County Executive, 

In [3]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re
from sklearn.feature_extraction.text import TfidfVectorizer

# Download required NLTK data once (run this once in your environment)
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess(text):
    text = str(text).lower().strip()
    # Remove non-alphabetic characters except spaces
    text = re.sub(r'[^a-z\s]', '', text)
    tokens = text.split()
    # Remove stopwords and lemmatize
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words]
    return ' '.join(tokens)

train_df['statement'] = train_df['statement'].apply(preprocess)
valid_df['statement'] = valid_df['statement'].apply(preprocess)
test_df['statement'] = test_df['statement'].apply(preprocess)

# Create TF-IDF vectorizer (no need for stop_words param since already removed)
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=5000)

X_train_tfidf = vectorizer.fit_transform(train_df['statement'])
X_valid_tfidf = vectorizer.transform(valid_df['statement'])
X_test_tfidf = vectorizer.transform(test_df['statement'])

print(X_train_tfidf.shape, X_valid_tfidf.shape, X_test_tfidf.shape)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


(18369, 5000) (2297, 5000) (2296, 5000)


In [4]:
def map_label(label):
    # 3: half-true, 4: mostly-true, 5: true → 1 (true)
    # 0: pants-fire, 1: false, 2: barely-true → 0 (false)
    return 1 if label in [3, 4, 5] else 0

train_df['binary_label'] = train_df['label'].apply(map_label)
valid_df['binary_label'] = valid_df['label'].apply(map_label)
test_df['binary_label'] = test_df['label'].apply(map_label)

y_train = train_df['binary_label'].values
y_valid = valid_df['binary_label'].values
y_test = test_df['binary_label'].values

In [5]:
# Check if both classes are present in each split
print("Train label distribution:", np.bincount(y_train))
print("Validation label distribution:", np.bincount(y_valid))
print("Test label distribution:", np.bincount(y_test))


Train label distribution: [10591  7778]
Validation label distribution: [1325  972]
Test label distribution: [1323  973]


SVM

In [6]:
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score

# Train the model
clf = LinearSVC(class_weight='balanced', random_state=42)
clf.fit(X_train_tfidf, y_train)

# Validate
y_valid_pred = clf.predict(X_valid_tfidf)
print("Validation Accuracy:", accuracy_score(y_valid, y_valid_pred))
print(classification_report(y_valid, y_valid_pred, target_names=['false', 'true']))

# Test
y_test_pred = clf.predict(X_test_tfidf)
print("Test Accuracy:", accuracy_score(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, target_names=['false', 'true']))

Validation Accuracy: 0.6586852416195037
              precision    recall  f1-score   support

       false       0.73      0.64      0.68      1325
        true       0.58      0.68      0.63       972

    accuracy                           0.66      2297
   macro avg       0.66      0.66      0.66      2297
weighted avg       0.67      0.66      0.66      2297

Test Accuracy: 0.6589721254355401
              precision    recall  f1-score   support

       false       0.73      0.64      0.69      1323
        true       0.58      0.68      0.63       973

    accuracy                           0.66      2296
   macro avg       0.66      0.66      0.66      2296
weighted avg       0.67      0.66      0.66      2296



Logistic Regression

In [7]:
from sklearn.linear_model import LogisticRegression

clf_lr = LogisticRegression(max_iter=1000)
clf_lr.fit(X_train_tfidf, y_train)

print("LR Validation Accuracy:", accuracy_score(y_valid, clf_lr.predict(X_valid_tfidf)))
print(classification_report(y_valid, clf_lr.predict(X_valid_tfidf), target_names=['false', 'true']))

print("LR Test Accuracy:", accuracy_score(y_test, clf_lr.predict(X_test_tfidf)))
print(classification_report(y_test, clf_lr.predict(X_test_tfidf), target_names=['false', 'true']))

LR Validation Accuracy: 0.6769699608184588
              precision    recall  f1-score   support

       false       0.71      0.75      0.73      1325
        true       0.63      0.58      0.60       972

    accuracy                           0.68      2297
   macro avg       0.67      0.66      0.67      2297
weighted avg       0.67      0.68      0.67      2297

LR Test Accuracy: 0.6803135888501742
              precision    recall  f1-score   support

       false       0.71      0.76      0.73      1323
        true       0.64      0.57      0.60       973

    accuracy                           0.68      2296
   macro avg       0.67      0.67      0.67      2296
weighted avg       0.68      0.68      0.68      2296



Random Forest

In [8]:
from sklearn.ensemble import RandomForestClassifier
clf_rf = RandomForestClassifier(n_estimators=100, random_state=42)
clf_rf.fit(X_train_tfidf, y_train)
print("RF Validation Accuracy:", accuracy_score(y_valid, clf_rf.predict(X_valid_tfidf)))
print(classification_report(y_valid, clf_rf.predict(X_valid_tfidf), target_names=['false', 'true']))
print("RF Test Accuracy:", accuracy_score(y_test, clf_rf.predict(X_test_tfidf)))
print(classification_report(y_test, clf_rf.predict(X_test_tfidf), target_names=['false', 'true']))

RF Validation Accuracy: 0.6752285589899869
              precision    recall  f1-score   support

       false       0.72      0.71      0.72      1325
        true       0.61      0.62      0.62       972

    accuracy                           0.68      2297
   macro avg       0.67      0.67      0.67      2297
weighted avg       0.68      0.68      0.68      2297

RF Test Accuracy: 0.669425087108014
              precision    recall  f1-score   support

       false       0.71      0.72      0.71      1323
        true       0.61      0.60      0.61       973

    accuracy                           0.67      2296
   macro avg       0.66      0.66      0.66      2296
weighted avg       0.67      0.67      0.67      2296



NN

In [9]:
# deeplearning using pytorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train_tfidf.toarray(), dtype=torch.float32)
X_valid_tensor = torch.tensor(X_valid_tfidf.toarray(), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_tfidf.toarray(), dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_valid_tensor = torch.tensor(y_valid, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
# Create DataLoader for batching
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
valid_dataset = TensorDataset(X_valid_tensor, y_valid_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
# Define a simple feedforward neural network

class SimpleNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        return x
    
# Initialize the model, loss function, and optimizer
input_dim = X_train_tensor.shape[1]
output_dim = 2  # For binary classification
model = SimpleNN(input_dim, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    # Validation
    model.eval()
    valid_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            valid_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {valid_loss/len(valid_loader):.4f}, Accuracy: {correct/total:.4f}')

# Testing the model
model.eval()
test_loss = 0.0
correct = 0
total = 0
all_preds = []
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        all_preds.extend(predicted.cpu().numpy())
print(f'Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {correct/total:.4f}')

from sklearn.metrics import classification_report
print(classification_report(y_test, all_preds, target_names=['false', 'true']))

Epoch [1/10], Loss: 0.5747, Accuracy: 0.6861
Epoch [2/10], Loss: 0.5821, Accuracy: 0.6774
Epoch [3/10], Loss: 0.6072, Accuracy: 0.6735
Epoch [4/10], Loss: 0.6389, Accuracy: 0.6744
Epoch [5/10], Loss: 0.7098, Accuracy: 0.6674
Epoch [6/10], Loss: 0.8376, Accuracy: 0.6604
Epoch [7/10], Loss: 0.9039, Accuracy: 0.6613
Epoch [8/10], Loss: 1.0368, Accuracy: 0.6522
Epoch [9/10], Loss: 1.1413, Accuracy: 0.6613
Epoch [10/10], Loss: 1.2286, Accuracy: 0.6508
Test Loss: 1.2180, Test Accuracy: 0.6581
              precision    recall  f1-score   support

       false       0.72      0.67      0.69      1323
        true       0.59      0.64      0.61       973

    accuracy                           0.66      2296
   macro avg       0.65      0.66      0.65      2296
weighted avg       0.66      0.66      0.66      2296



LSTM

In [10]:
# lstm
import torch.nn.functional as F

class LSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1):
        super(LSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.5)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add sequence dimension
        lstm_out, _ = self.lstm(x)
        out = self.fc(lstm_out[:, -1, :])  # Use the last time step
        return out
    
# Initialize the LSTM model, loss function, and optimizer
hidden_dim = 64
lstm_model = LSTMClassifier(input_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(lstm_model.parameters(), lr=0.001)

# Training loop for LSTM
num_epochs = 10
for epoch in range(num_epochs):
    lstm_model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = lstm_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    # Validation
    lstm_model.eval()
    valid_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            outputs = lstm_model(inputs)
            loss = criterion(outputs, labels)
            valid_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {valid_loss/len(valid_loader):.4f}, Accuracy: {correct/total:.4f}')

# Testing the LSTM model
lstm_model.eval()
test_loss = 0.0
correct = 0
total = 0
all_preds = []
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = lstm_model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        all_preds.extend(predicted.cpu().numpy())
print(f'Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {correct/total:.4f}')

from sklearn.metrics import classification_report
print(classification_report(y_test, all_preds, target_names=['false', 'true']))



Epoch [1/10], Loss: 0.5783, Accuracy: 0.6835
Epoch [2/10], Loss: 0.5755, Accuracy: 0.6852
Epoch [3/10], Loss: 0.5956, Accuracy: 0.6735
Epoch [4/10], Loss: 0.6200, Accuracy: 0.6717
Epoch [5/10], Loss: 0.6401, Accuracy: 0.6761
Epoch [6/10], Loss: 0.6587, Accuracy: 0.6691
Epoch [7/10], Loss: 0.6755, Accuracy: 0.6683
Epoch [8/10], Loss: 0.6882, Accuracy: 0.6683
Epoch [9/10], Loss: 0.7094, Accuracy: 0.6617
Epoch [10/10], Loss: 0.7206, Accuracy: 0.6678
Test Loss: 0.7037, Test Accuracy: 0.6577
              precision    recall  f1-score   support

       false       0.71      0.70      0.70      1323
        true       0.59      0.61      0.60       973

    accuracy                           0.66      2296
   macro avg       0.65      0.65      0.65      2296
weighted avg       0.66      0.66      0.66      2296



In [11]:
import torch

print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")


Torch version: 2.7.0+cu118
CUDA available: True
GPU name: NVIDIA GeForce RTX 3050 Laptop GPU


BERT

In [12]:
# Add BERT model for binary classification
# Install transformers if not already installed: pip install transformers

from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import torch

class WeightedBertForSequenceClassification(BertForSequenceClassification):
    def __init__(self, config, class_weights=None):
        super().__init__(config)
        self.class_weights = class_weights

    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs.get("logits")
        loss_fct = nn.CrossEntropyLoss(weight=self.class_weights)
        loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Calculate weights, e.g.:
import numpy as np
class_counts = np.bincount(y_train)
weights = torch.tensor(1.0 / class_counts, dtype=torch.float)
weights = weights / weights.sum()  # Normalize

class NewsDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_len)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=1)
    return {
        'accuracy': accuracy_score(labels, preds),
        'f1': f1_score(labels, preds, average='weighted'),
        'precision': precision_score(labels, preds, average='weighted'),
        'recall': recall_score(labels, preds, average='weighted')
    }

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train_dataset = NewsDataset(train_df['statement'].tolist(), y_train, tokenizer)
valid_dataset = NewsDataset(valid_df['statement'].tolist(), y_valid, tokenizer)
test_dataset = NewsDataset(test_df['statement'].tolist(), y_test, tokenizer)

model_bert = WeightedBertForSequenceClassification.from_pretrained(
    'bert-base-uncased', num_labels=2, class_weights=weights
)

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=2,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir='./logs',
    logging_steps=10,
    load_best_model_at_end=False,
)

trainer = Trainer(
    model=model_bert,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    compute_metrics=compute_metrics
)

trainer.train()
eval_results = trainer.evaluate()
print("BERT Validation Accuracy:", eval_results['eval_accuracy'])

# Test set evaluation
test_results = trainer.predict(test_dataset)
bert_test_acc = accuracy_score(y_test, test_results.predictions.argmax(axis=1))
print("BERT Test Accuracy:", bert_test_acc)
print(classification_report(y_test, test_results.predictions.argmax(axis=1), target_names=['false','true']))




model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of WeightedBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
10,0.6692
20,0.6736
30,0.7229
40,0.7375
50,0.6815
60,0.6471
70,0.6505
80,0.7203
90,0.6211
100,0.6726


BERT Validation Accuracy: 0.6930779277318241
BERT Test Accuracy: 0.7064459930313589
              precision    recall  f1-score   support

       false       0.77      0.69      0.73      1323
        true       0.63      0.72      0.68       973

    accuracy                           0.71      2296
   macro avg       0.70      0.71      0.70      2296
weighted avg       0.71      0.71      0.71      2296



In [13]:
model_bert.save_pretrained("best_model_bert")
tokenizer.save_pretrained("best_model_bert")

('best_model_bert\\tokenizer_config.json',
 'best_model_bert\\special_tokens_map.json',
 'best_model_bert\\vocab.txt',
 'best_model_bert\\added_tokens.json')

In [14]:
import joblib

# Collect test accuracies
results = {
    "svm": accuracy_score(y_test, y_test_pred),
    "logreg": accuracy_score(y_test, clf_lr.predict(X_test_tfidf)),
    "simplenn": correct / total,  # from SimpleNN test
    "lstm": correct / total,      # from LSTM test
    "bert": bert_test_acc,        # from BERT test
}

# Find the best model
best_model_name = max(results, key=results.get)
print(f"Best model: {best_model_name} with accuracy {results[best_model_name]:.4f}")

# Save the best model
"""
if best_model_name == "svm":
    joblib.dump(clf, "best_model_svm.joblib")
elif best_model_name == "logreg":
    joblib.dump(clf_lr, "best_model_logreg.joblib")
elif best_model_name == "simplenn":
    torch.save(model.state_dict(), "best_model_simplenn.pt")
elif best_model_name == "lstm":
    torch.save(lstm_model.state_dict(), "best_model_lstm.pt")
"""
if best_model_name == "bert":
    model_bert.save_pretrained("best_model_bert")
    tokenizer.save_pretrained("best_model_bert")

Best model: bert with accuracy 0.7064
