In [6]:
import torch
import torch
if torch.cuda.is_available():
   print("gpu")
else: 
    torch.device("cpu")

gpu


In [1]:
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
import numpy as np
import json
from torch.utils.data import DataLoader
from scipy.stats import spearmanr
from sklearn.metrics import accuracy_score

# Load tokenizer and model
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)

# Determine the computing device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Function to encode the data
def encode_data(data):
    texts = [f"[HYP] {item['hyp']} [TGT] {item['tgt']} [SRC] {item['src']} [REF] {item['ref']} [TASK] {item['task']}" for item in data]
    labels = [1 if item['label'] == 'Hallucination' else 0 for item in data]
    prop_hallucination = [float(item.get('p(Hallucination)', 0)) for item in data]
    encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
    return encodings, labels, prop_hallucination

# Load and encode the data
with open('newtrain_data.json', 'r', encoding='utf-8') as f:
    train_data = json.load(f)
train_encodings, train_labels, _ = encode_data(train_data)

with open('val.model-agnostic.json', 'r', encoding='utf-8') as f:
    val_data = json.load(f)
val_encodings, val_labels, val_prop_hallucination = encode_data(val_data)

# Define the dataset class
class HallucinationDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = {key: torch.tensor(val) for key, val in encodings.items()}
        self.labels = torch.tensor(labels)

    def __getitem__(self, idx):
        item = {key: self.encodings[key][idx] for key in self.encodings}
        item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = HallucinationDataset(train_encodings, train_labels)
val_dataset = HallucinationDataset(val_encodings, val_labels)

# DataLoader setup
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=64, pin_memory=True)

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
)

# Define compute_metrics function for accuracy and Spearman correlation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=-1)
    pred_probs = pred.predictions[:, 1]  # probabilities for the positive class
    acc = accuracy_score(labels, preds)
    spearman_corr = spearmanr(val_prop_hallucination, pred_probs).correlation
    return {'accuracy': acc, 'spearman_correlation': spearman_corr}

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

# Train and evaluate
trainer.train()
results = trainer.evaluate()
print(results)





Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)


  0%|          | 0/939 [00:00<?, ?it/s]

{'loss': 0.7137, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.03}
{'loss': 0.6671, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.06}
{'loss': 0.6619, 'learning_rate': 3e-06, 'epoch': 0.1}
{'loss': 0.6665, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.13}
{'loss': 0.6457, 'learning_rate': 5e-06, 'epoch': 0.16}
{'loss': 0.6176, 'learning_rate': 6e-06, 'epoch': 0.19}
{'loss': 0.6069, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.22}
{'loss': 0.5384, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.26}
{'loss': 0.4721, 'learning_rate': 9e-06, 'epoch': 0.29}
{'loss': 0.5301, 'learning_rate': 1e-05, 'epoch': 0.32}
{'loss': 0.4774, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.35}
{'loss': 0.4778, 'learning_rate': 1.2e-05, 'epoch': 0.38}
{'loss': 0.46, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.42}
{'loss': 0.424, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.45}
{'loss': 0.3889, 'learning_rate': 1.5e-05, 'epoch': 0.48}
{'loss': 0.4312, 'learning_

  0%|          | 0/8 [00:00<?, ?it/s]

{'eval_loss': 1.65683913230896, 'eval_accuracy': 0.6533066132264529, 'eval_spearman_correlation': 0.36353853629277566, 'eval_runtime': 55.8508, 'eval_samples_per_second': 8.935, 'eval_steps_per_second': 0.143, 'epoch': 3.0}


In [2]:
model_save_path = './saved_model'
tokenizer_save_path = './saved_tokenizer'
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(tokenizer_save_path)

('./saved_tokenizer\\tokenizer_config.json',
 './saved_tokenizer\\special_tokens_map.json',
 './saved_tokenizer\\vocab.txt',
 './saved_tokenizer\\added_tokens.json')

In [1]:
import torch
import json
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score,f1_score

# Load the trained model and tokenizer
model_path = './saved_model'
tokenizer_path = './saved_tokenizer'
model = DistilBertForSequenceClassification.from_pretrained(model_path, num_labels=2)
tokenizer = DistilBertTokenizer.from_pretrained(tokenizer_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Function to encode the data
def encode_data(data, tokenizer):
    # Provide default values if keys are missing
    texts = [
        f"[HYP] {item.get('hyp', 'Missing HYP')} "
        f"[TGT] {item.get('tgt', 'Missing TGT')} "
        f"[SRC] {item.get('src', 'Missing SRC')} "
        f"[REF] {item.get('ref', 'Missing REF')} "
        f"[TASK] {item.get('task', 'Missing TASK')}"
        for item in data
    ]
    encodings = tokenizer(texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
    return encodings['input_ids'], encodings['attention_mask']

# Load and encode test data
with open('val.model-agnostic.json', 'r', encoding='utf-8') as f:
    test_data = json.load(f)
test_input_ids, test_attention_masks = encode_data(test_data, tokenizer)

# DataLoader setup
class TestDataLoader(torch.utils.data.Dataset):
    def __init__(self, input_ids, attention_masks):
        self.input_ids = input_ids
        self.attention_masks = attention_masks

    def __getitem__(self, idx):
        return {'input_ids': self.input_ids[idx], 'attention_mask': self.attention_masks[idx]}

    def __len__(self):
        return len(self.input_ids)

test_dataset = TestDataLoader(test_input_ids, test_attention_masks)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Prediction
def evaluate_model(model, loader):
    model.eval()
    predictions = []
    actual_labels = []
    with torch.no_grad():
        for batch in loader:
            outputs = model(batch['input_ids'].to(device), attention_mask=batch['attention_mask'].to(device))
            preds = torch.argmax(outputs.logits, dim=1)
            predictions.extend(preds.cpu().numpy())
            actual_labels.extend(batch['labels'].cpu().numpy())

    # Calculate accuracy and other metrics if needed
    accuracy = accuracy_score(actual_labels, predictions)
    f1 = f1_score(actual_labels, preds)
    return accuracy, predictions

# Call the evaluation function
accuracy, predictions = evaluate_model(model, test_loader)
print("Test Accuracy:", accuracy)

# Optionally, save predictions to CSV
prediction_data = pd.DataFrame({'Predictions': predictions})
prediction_data.to_csv('predictions.csv', index=False)



KeyError: 'labels'

In [7]:
import torch
import numpy as np
import json
import pandas as pd
from torch.utils.data import DataLoader
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from scipy.stats import spearmanr
from sklearn.metrics import accuracy_score,f1_score

# Assuming tokenizer and model are loaded and the model is placed on the correct device

# Function to encode the data and handle the label array
def encode_data_with_labels(data, tokenizer):
    texts = []
    binary_labels = []  # 0 for "Not Hallucination", 1 for "Hallucination"

    for item in data:
        hyp = item.get('hyp', 'Missing HYP')
        tgt = item.get('tgt', 'Missing TGT')
        src = item.get('src', 'Missing SRC')
        ref = item.get('ref', 'Missing REF') if 'ref' in item else 'No REF'
        task = item.get('task', 'Missing TASK')
        # Interpret the label list: 0 if all entries are "Not Hallucination", 1 otherwise
        label = 1 if any(label == "Hallucination" for label in item.get('labels', [])) else 0
        texts.append(f"[HYP] {hyp} [TGT] {tgt} [SRC] {src} [REF] {ref} [TASK] {task}")
        binary_labels.append(label)

    encodings = tokenizer(texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
    return encodings['input_ids'], encodings['attention_mask'], torch.tensor(binary_labels)

# Load test data and encode it
with open('test.model-agnostic.json', 'r', encoding='utf-8') as f:
    test_data = json.load(f)
test_input_ids, test_attention_masks, test_labels = encode_data_with_labels(test_data, tokenizer)

# DataLoader for the test data
class TestDataLoader(torch.utils.data.Dataset):
    def __init__(self, input_ids, attention_masks, labels):
        self.input_ids = input_ids
        self.attention_masks = attention_masks
        self.labels = labels

    def __getitem__(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'attention_mask': self.attention_masks[idx],
            'labels': self.labels[idx]
        }

    def __len__(self):
        return len(self.input_ids)

test_dataset = TestDataLoader(test_input_ids, test_attention_masks, test_labels)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Model evaluation function assuming it returns predictions
def evaluate_model(model, loader):
    model.eval()
    predictions = []
    actual_labels = []
    with torch.no_grad():
        for batch in loader:
            outputs = model(batch['input_ids'].to(device), attention_mask=batch['attention_mask'].to(device))
            preds = torch.argmax(outputs.logits, dim=1)
            predictions.extend(preds.cpu().numpy())
            actual_labels.extend(batch['labels'].cpu().numpy())

    # Calculate accuracy and other metrics if needed
    accuracy = accuracy_score(actual_labels, predictions)
    f1 = f1_score(actual_labels, preds)
    return accuracy, predictions

# Call the evaluation function
accuracy, predictions = evaluate_model(model, test_loader)
print("Test Accuracy:", accuracy)

# Optionally, save predictions to CSV
prediction_data = pd.DataFrame({'Predictions': predictions})
prediction_data.to_csv('predictions.csv', index=False)


Test Accuracy: 0.5906666666666667
