# Analyse Dataset

Our goal in this project is to analyse how well model probes can generalise to new tasks. A downside of this is that we need to ensure the datasets accurately capture high stakes situations. We analyse a series of statistics in this notebook to ensure the dataset is of high quality.

## Goal:

- [x] Generate completions for each prompt in the dataset.
- [x] Analyse the completions to ensure there aren't confounding factors 

In [None]:
#Imports
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM
import os
import pandas as pd
from pathlib import Path
from models_under_pressure.utils import generate_completions

project_root = Path("..").resolve()
print(f'Current Working Directory:{os.getcwd()}')

In [None]:
# Load the dataset from jsonl file
dataset_path = project_root / "temp_data/dataset_21_feb.jsonl"
df = pd.read_json(dataset_path, lines=True)
print(f"Loaded dataset with {len(df)} rows")


In [None]:
# Load the dataset with completions
completions_path = project_root / "temp_data/dataset_21-02-2025_completions.csv"
df = pd.read_csv(completions_path)
print(f"Loaded dataset with {len(df)} rows")

In [None]:
# Load the model
os.environ["TOKENIZERS_PARALLELISM"] = "false"
model_name = "meta-llama/Llama-3.3-70B-Instruct"
cache_dir = '/scratch/ucabwjn/.cache'
device = 'cuda:0'

# Load the LLaMA-3-1B model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_name, 
                                             device_map="auto",
                                             max_memory={0: "70GB", 1: "70GB", 3: "70GB"},
                                             torch_dtype=torch.float16,
                                             cache_dir=cache_dir)
tokenizer = AutoTokenizer.from_pretrained(model_name)

if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

In [None]:
# Generate completions
df['completions'] = generate_completions(model, tokenizer, df['prompt'].tolist())

In [None]:
# Display a sample of prompt-completion pairs
pd.set_option('display.max_colwidth', None)  # Show full text
sample_size = 5
sample_df = df[['prompt', 'completions']].sample(n=sample_size, random_state=42)

print("Sample of Prompt-Completion Pairs:\n")
for idx, row in sample_df.iterrows():
    print(f"Prompt {idx}:\n{row['prompt']}\n")
    print(f"Completion:\n{row['completions']}\n")
    print("-"*80 + "\n")


In [None]:
# Save the dataset with completions
output_path = project_root / "temp_data/dataset_21-02-2025_completions.csv"
df.to_csv(output_path, index=False)
print(f"Saved dataset with completions to {output_path}")

# Create a word cloud for the completions and prompts

In [None]:
# Load the dataset with completions
df = pd.read_csv(project_root / "temp_data/dataset_21-02-2025_completions.csv")
print(f"Loaded dataset with {len(df)} rows")

In [None]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt

# Get unique categories
categories = df['category'].unique()

# Create word clouds for prompts
for category in categories:
    # Filter prompts for this category
    category_text = ' '.join(df[df['category'] == category]['prompt'].tolist())
    
    # Generate word cloud
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(category_text)
    
    # Create new figure for this category
    plt.figure(figsize=(20, 10))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(f'Word Cloud of Prompts - {category}')
    plt.show()

# Create word clouds for completions
for category in categories:
    # Filter completions for this category
    category_text = ' '.join(df[df['category'] == category]['completions'].tolist())
    
    # Generate word cloud
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(category_text)
    
    # Create new figure for this category
    plt.figure(figsize=(20, 10))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(f'Word Cloud of Completions - {category}')
    plt.show()


In [None]:
# Check unique values in high_stakes column
print(df['high_stakes'].unique())

# Convert high_stakes column to numeric values
df['high_stakes'] = df['high_stakes'].map({'0': 0, '1': 1, 'False': 0, 'True': 1})

print(df['high_stakes'].unique())

In [None]:
# Get a specific category (e.g., first category)
category = df['category'].unique()[0]

# Create word clouds for high stakes vs low stakes prompts
for stakes in [0, 1]:
    # Filter prompts for this category and stakes level
    stakes_text = ' '.join(df[(df['category'] == category) & 
                             (df['high_stakes'] == stakes)]['prompt'].tolist())
        
    # Generate word cloud
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(stakes_text)
    
    # Create new figure
    plt.figure(figsize=(20, 10))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(f'Word Cloud of Prompts - {category} (High Stakes: {stakes})')
    plt.show()

# Create word clouds for high stakes vs low stakes completions 
for stakes in [0, 1]:
    # Filter completions for this category and stakes level
    stakes_text = ' '.join(df[(df['category'] == category) & 
                             (df['high_stakes'] == stakes)]['completions'].tolist())
    
    # Generate word cloud
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(stakes_text)
    
    # Create new figure
    plt.figure(figsize=(20, 10))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(f'Word Cloud of Completions - {category} (High Stakes: {stakes})')
    plt.show()


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Get a specific category (e.g., first category)
category = df['category'].unique()[0]

# Create TF-IDF word clouds for high stakes vs low stakes prompts
for stakes in [0, 1]:
    # Filter prompts for this category and stakes level
    prompts = df[(df['category'] == category) & 
                 (df['high_stakes'] == stakes)]['prompt'].tolist()
    
    # Calculate TF-IDF
    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(prompts)
    
    # Get words and their TF-IDF scores
    feature_names = tfidf.get_feature_names_out()
    tfidf_scores = tfidf_matrix.sum(axis=0).A1
    
    # Create dictionary of word frequencies based on TF-IDF scores
    word_freq = dict(zip(feature_names, tfidf_scores))
    
    # Generate word cloud using TF-IDF frequencies
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_freq)
    
    # Create new figure
    plt.figure(figsize=(20, 10))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(f'TF-IDF Word Cloud of Prompts - {category} (High Stakes: {stakes})')
    plt.show()

# Create TF-IDF word clouds for high stakes vs low stakes completions
for stakes in [0, 1]:
    # Filter completions for this category and stakes level
    completions = df[(df['category'] == category) & 
                    (df['high_stakes'] == stakes)]['completions'].tolist()
    
    # Calculate TF-IDF
    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(completions)
    
    # Get words and their TF-IDF scores
    feature_names = tfidf.get_feature_names_out()
    tfidf_scores = tfidf_matrix.sum(axis=0).A1
    
    # Create dictionary of word frequencies based on TF-IDF scores
    word_freq = dict(zip(feature_names, tfidf_scores))
    
    # Generate word cloud using TF-IDF frequencies
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_freq)
    
    # Create new figure
    plt.figure(figsize=(20, 10))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(f'TF-IDF Word Cloud of Completions - {category} (High Stakes: {stakes})')
    plt.show()
