In [None]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
from transformers import BertTokenizer, BertForSequenceClassification, get_linear_schedule_with_warmup
from torch.utils.data import Dataset, DataLoader
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

# Read the dataset
file_path = "/content/drive/MyDrive/ProjectFilesForml/LogicalFallacyDetection/binary_data.csv"
df = pd.read_csv(file_path)

print("Dataset shape:", df.shape)
print("\nLabel distribution:")
print(df['label'].value_counts())



Mounted at /content/drive
Dataset shape: (10338, 34)

Label distribution:
label
False    6704
True     3634
Name: count, dtype: int64


In [None]:

# Create label mapping
label_mapping = {label: idx for idx, label in enumerate(df['label'].unique())}
df['label_encoded'] = df['label'].map(label_mapping)

class FallacyDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Split data into train/val/test
train_texts, temp_texts, train_labels, temp_labels = train_test_split(
    df['text'].values, df['label_encoded'].values,
    test_size=0.3, random_state=42, stratify=df['label_encoded']
)

val_texts, test_texts, val_labels, test_labels = train_test_split(
    temp_texts, temp_labels,
    test_size=0.5, random_state=42, stratify=temp_labels
)

# Initialize tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
model = BertForSequenceClassification.from_pretrained(
    'bert-large-uncased',
    num_labels=len(label_mapping),
    hidden_dropout_prob=0.1,
    attention_probs_dropout_prob=0.1
)

# Create datasets
train_dataset = FallacyDataset(train_texts, train_labels, tokenizer)
val_dataset = FallacyDataset(val_texts, val_labels, tokenizer)
test_dataset = FallacyDataset(test_texts, test_labels, tokenizer)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)

# Training setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
model.to(device)

# Training parameters
num_epochs = 10
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)

# Create scheduler with warmup
num_training_steps = len(train_loader) * num_epochs
num_warmup_steps = num_training_steps // 10
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=num_training_steps
)

# Training loop
best_val_accuracy = 0
for epoch in range(num_epochs):
    print(f'\nEpoch {epoch + 1}/{num_epochs}')
    print('-' * 10)

    # Training phase
    model.train()
    total_train_loss = 0
    train_predictions = []
    train_true_labels = []

    for batch_idx, batch in enumerate(train_loader):
        optimizer.zero_grad()

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )

        loss = outputs.loss
        total_train_loss += loss.item()

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

        train_predictions.extend(outputs.logits.argmax(dim=-1).cpu().numpy())
        train_true_labels.extend(labels.cpu().numpy())

        if (batch_idx + 1) % 50 == 0:
            print(f'Batch {batch_idx + 1}/{len(train_loader)} - Loss: {loss.item():.4f}')

    # Validation phase
    model.eval()
    total_val_loss = 0
    val_predictions = []
    val_true_labels = []

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )

            total_val_loss += outputs.loss.item()
            val_predictions.extend(outputs.logits.argmax(dim=-1).cpu().numpy())
            val_true_labels.extend(labels.cpu().numpy())

    # Calculate metrics
    train_accuracy = np.mean(np.array(train_predictions) == np.array(train_true_labels))
    val_accuracy = np.mean(np.array(val_predictions) == np.array(val_true_labels))

    print(f'\nTraining Loss: {total_train_loss/len(train_loader):.4f}')
    print(f'Validation Loss: {total_val_loss/len(val_loader):.4f}')
    print(f'Training Accuracy: {train_accuracy:.4f}')
    print(f'Validation Accuracy: {val_accuracy:.4f}')

    # Save best model
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        model_save_path = "/content/drive/MyDrive/ProjectFilesForml/best_bert_model_binary"
        model.save_pretrained(model_save_path)
        tokenizer.save_pretrained(model_save_path)
        print(f"New best model saved with validation accuracy: {val_accuracy:.4f}")


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using device: cuda

Epoch 1/10
----------
Batch 50/453 - Loss: 0.6263
Batch 100/453 - Loss: 0.7752
Batch 150/453 - Loss: 0.6360
Batch 200/453 - Loss: 0.6997
Batch 250/453 - Loss: 0.6481
Batch 300/453 - Loss: 0.4401
Batch 350/453 - Loss: 0.6534
Batch 400/453 - Loss: 0.6292
Batch 450/453 - Loss: 0.6671

Training Loss: 0.6443
Validation Loss: 0.6120
Training Accuracy: 0.6495
Validation Accuracy: 0.6647
New best model saved with validation accuracy: 0.6647

Epoch 2/10
----------
Batch 50/453 - Loss: 0.9616
Batch 100/453 - Loss: 0.6541
Batch 150/453 - Loss: 0.4231
Batch 200/453 - Loss: 0.6489
Batch 250/453 - Loss: 0.5166
Batch 300/453 - Loss: 0.5457
Batch 350/453 - Loss: 0.5269
Batch 400/453 - Loss: 0.5612
Batch 450/453 - Loss: 0.6237

Training Loss: 0.6150
Validation Loss: 0.6252
Training Accuracy: 0.6693
Validation Accuracy: 0.6570

Epoch 3/10
----------
Batch 50/453 - Loss: 0.6167
Batch 100/453 - Loss: 0.4473
Batch 150/453 - Loss: 0.2707
Batch 200/453 - Loss: 0.4487
Batch 250/453 - Loss:

KeyboardInterrupt: 

In [None]:

# Final evaluation on test set
model.eval()
test_predictions = []
test_true_labels = []

with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        test_predictions.extend(outputs.logits.argmax(dim=-1).cpu().numpy())
        test_true_labels.extend(labels.cpu().numpy())

# Convert numeric labels back to original classes
reverse_mapping = {v: k for k, v in label_mapping.items()}
test_predictions_labels = [reverse_mapping[pred] for pred in test_predictions]
test_true_labels = [reverse_mapping[label] for label in test_true_labels]

# Print final classification report
print("\nFinal Classification Report:")
print(classification_report(test_true_labels, test_predictions_labels))

# Save final model
final_model_path = "/content/drive/MyDrive/ProjectFilesForml/final_bert_model_binary"
model.save_pretrained(final_model_path)
tokenizer.save_pretrained(final_model_path)

# Function for making predictions
def predict_fallacy(text):
    model.eval()
    encoding = tokenizer(
        text,
        add_special_tokens=True,
        max_length=512,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )

    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        prediction = outputs.logits.argmax(dim=-1)

    return reverse_mapping[prediction.item()]

# Test the model with some sample predictions
sample_texts = [
    "Your first text example here",
    "Your second text example here",
    "Your third text example here"
]

print("\nSample Predictions:")
for text in sample_texts:
    prediction = predict_fallacy(text)
    print(f"\nText: {text}")
    print(f"Predicted fallacy: {prediction}")



Final Classification Report:
              precision    recall  f1-score   support

 barely-true       0.24      0.32      0.27       248
       false       0.24      0.27      0.25       298
   half-true       0.26      0.29      0.27       317
 mostly-true       0.26      0.27      0.26       295
         nan       0.77      1.00      0.87        17
  pants-fire       0.32      0.25      0.28       125
        true       0.31      0.14      0.19       251

    accuracy                           0.27      1551
   macro avg       0.34      0.36      0.34      1551
weighted avg       0.27      0.27      0.26      1551


Sample Predictions:

Text: Your first text example here
Predicted fallacy: false

Text: Your second text example here
Predicted fallacy: false

Text: Your third text example here
Predicted fallacy: false


In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
import pandas as pd
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from torch.utils.data import Dataset, DataLoader

# Define the dataset class
class FallacyDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Function to load data and model, split data, and evaluate the model
def load_and_evaluate_model(file_path, model_path):
    # Load dataset
    df = pd.read_csv(file_path)

    # Create label mapping
    label_mapping = {label: idx for idx, label in enumerate(df['label'].unique())}
    df['label_encoded'] = df['label'].map(label_mapping)

    # Split data into train, validation, and test sets
    train_texts, temp_texts, train_labels, temp_labels = train_test_split(
        df['text'].values, df['label_encoded'].values,
        test_size=0.3, random_state=42, stratify=df['label_encoded']
    )

    val_texts, test_texts, val_labels, test_labels = train_test_split(
        temp_texts, temp_labels,
        test_size=0.5, random_state=42, stratify=temp_labels
    )

    # Initialize tokenizer and model
    tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
    model = BertForSequenceClassification.from_pretrained(model_path)

    # Create datasets
    train_dataset = FallacyDataset(train_texts, train_labels, tokenizer)
    val_dataset = FallacyDataset(val_texts, val_labels, tokenizer)
    test_dataset = FallacyDataset(test_texts, test_labels, tokenizer)

    # Create dataloaders
    test_loader = DataLoader(test_dataset, batch_size=16)

    # Evaluate the model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    model.eval()
    test_predictions = []
    test_true_labels = []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            test_predictions.extend(outputs.logits.argmax(dim=-1).cpu().numpy())
            test_true_labels.extend(labels.cpu().numpy())

    # Convert numeric labels back to original classes
    reverse_mapping = {v: k for k, v in label_mapping.items()}
    test_predictions_labels = [reverse_mapping[pred] for pred in test_predictions]
    test_true_labels = [reverse_mapping[label] for label in test_true_labels]

    # Print final classification report
    print("\nFinal Classification Report:")
    print(classification_report(test_true_labels, test_predictions_labels))

# Example usage
file_path = "/content/drive/MyDrive/ProjectFilesForml/LogicalFallacyDetection/binary_data.csv"
model_path = "/content/drive/MyDrive/ProjectFilesForml/best_bert_model_binary"
load_and_evaluate_model(file_path, model_path)



Final Classification Report:
              precision    recall  f1-score   support

       False       0.70      0.89      0.78      1006
        True       0.57      0.28      0.38       545

    accuracy                           0.67      1551
   macro avg       0.63      0.58      0.58      1551
weighted avg       0.65      0.67      0.64      1551

