In [2]:
pip install transformers[torch]

^C
Note: you may need to restart the kernel to use updated packages.


In [None]:
pip install vaderSentiment


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, pipeline
import pandas as pd
import matplotlib.pyplot as plt
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Check if GPU is available
device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
print(f"Using device: {device}")

# Set up the model and tokenizer
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
tokenizer.pad_token = tokenizer.eos_token
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device='cpu')

# Load and prepare the dataset
file_path = '/content/drive/MyDrive/bold.csv'
df = pd.read_csv(file_path)
df = df[df.domain == "gender"]

# Fixed 2000 samples at the start of the experiment
sample_size_per_category = 1000
seed = 42

# Randomly choose samples for male and female categories
df_actors = df[df.category == "American_actors"].sample(n=sample_size_per_category, random_state=seed)
df_actresses = df[df.category == "American_actresses"].sample(n=sample_size_per_category, random_state=seed)
fixed_samples = pd.concat([df_actors, df_actresses]).reset_index(drop=True)

# Function to generate synthetic data using pipeline
def generate_synthetic_data(pipeline, prompts, max_length=25, min_words=15):
    synthetic_data = []
    for item in prompts:
        generated_text = pipeline(item["text"], min_length=min_words, max_new_tokens=max_length, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)[0]['generated_text']
        if len(generated_text) > len(item["text"]):
            synthetic_data.append({
                "generation": item["generation"],
                "sample_id": item["sample_id"],
                "prompt": item["text"],
                "text": generated_text[len(item["text"]):].strip(),
                "demographic": item["demographic"],
                "category": item["category"]
            })
        else:
            synthetic_data.append({
                "generation": item["generation"],
                "sample_id": item["sample_id"],
                "prompt": item["text"],
                "text": " ",
                "demographic": item["demographic"],
                "category": item["category"]
            })
    return synthetic_data

# Prepare synthetic data for training
def prepare_synthetic_data_for_training(synthetic_data):
    train_texts = [item["text"] for item in synthetic_data]
    train_inputs = tokenizer(train_texts, truncation=True, padding=True, max_length=25, return_tensors="pt")
    train_labels = tokenizer(train_texts, truncation=True, padding=True, max_length=25, return_tensors="pt").input_ids
    return train_inputs, train_labels

class SimpleDataset(torch.utils.data.Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx].to(device) for key, val in self.inputs.items()}
        item['labels'] = self.labels[idx].to(device)
        return item

    def __len__(self):
        return len(self.inputs["input_ids"])

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        outputs = model(**inputs)
        logits = outputs.get("logits")
        labels = inputs.get("labels")
        # Custom loss computation using Cross-Entropy Loss
        loss = torch.nn.functional.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Function to perform sentiment analysis
def analyze_sentiments(data, sentiment_analyzer):
    sentiment_results = []
    for item in data:
        if item["text"].strip() == "":
            continue
        sentiment = sentiment_analyzer.polarity_scores(item["text"])["compound"]
        sentiment_results.append({
            'demographic': item['demographic'],
            'category': item['category'],
            'score': sentiment,
            'sample_id': item['sample_id'],
            'generation': item['generation']
        })
    return sentiment_results

def analyze_regard(data, regard_model):
    sentiment_results = []
    for item in data:
        if item["text"].strip() == "":
            continue
        regard_score = regard_model(item["text"])
        print(f"Regard score for text '{item['text']}': {regard_score}")  # Debugging line

        # Assuming the format of regard_score is a list of dictionaries, based on common output formats for such models.
        if isinstance(regard_score, list) and len(regard_score) > 0 and isinstance(regard_score[0], dict):
            if any(entry.get('label') == 'positive' for entry in regard_score):
                positive_score = next((entry['score'] for entry in regard_score if entry['label'] == 'positive'), 0.0)
            else:
                positive_score = 0.0
        else:
            positive_score = 0.0  # Default value if the format is not as expected

        sentiment_results.append({
            'demographic': item['demographic'],
            'category': item['category'],
            'score': positive_score,
            'sample_id': item['sample_id'],
            'generation': item['generation']
        })
    return sentiment_results

# Function to calculate perplexity
def calculate_perplexity(model, tokenizer, text):
    input_ids = tokenizer.encode(text, return_tensors='pt').to(device)
    with torch.no_grad():
        outputs = model(input_ids, labels=input_ids)
        loss = outputs.loss
    perplexity = torch.exp(loss).item()
    return perplexity

# Function to plot sentiment score differences over generations
def plot_score_differences(sentiment_history, regard_history, perplexity_history):
    sentiment_df = pd.DataFrame(sentiment_history)
    regard_df = pd.DataFrame(regard_history)
    perplexity_df = pd.DataFrame(perplexity_history)

    plt.figure()
    plt.plot(sentiment_df['generation'], sentiment_df['difference'], label='Difference in VADER Sentiment Score')
    plt.xlabel('Generation')
    plt.ylabel('Difference in VADER Sentiment Score')
    plt.title('Difference in VADER Sentiment Score between American_actors and American_actresses')
    plt.legend()
    plt.show()

    plt.figure()
    plt.plot(regard_df['generation'], regard_df['difference'], label='Difference in Regard Score')
    plt.xlabel('Generation')
    plt.ylabel('Difference in Regard Score')
    plt.title('Difference in Regard Score between American_actors and American_actresses')
    plt.legend()
    plt.show()

    plt.figure()
    plt.plot(perplexity_df['generation'], perplexity_df['average_perplexity'], label='Average Perplexity')
    plt.xlabel('Generation')
    plt.ylabel('Average Perplexity')
    plt.title('Average Perplexity over Generations')
    plt.legend()
    plt.show()

# Main loop to run multiple generations
num_generations = 10
sentiment_analyzer = SentimentIntensityAnalyzer()
regard_analyzer = pipeline("text-classification", model="sasha/regardv3", device='cpu')
sentiment_history = []
regard_history = []
perplexity_history = []
generated_texts = []
previous_synthetic_data = None

for generation in range(num_generations):
    print(f"Generation {generation + 1}")

    current_data = fixed_samples.copy()
    current_data = current_data.assign(generation=generation, sample_id=current_data.index)
    current_data = current_data.to_dict(orient='records')
    current_data = [{"generation": item["generation"], "sample_id": item["sample_id"], "text": item["prompts"], "demographic": item["domain"], "category": item["category"]} for item in current_data]

    # Generate synthetic data
    synthetic_data = generate_synthetic_data(text_generator, current_data)
    generated_texts.append(pd.DataFrame(synthetic_data))

    # Fine-tune the model with synthetic data from the previous generation
    if previous_synthetic_data is not None:
        train_inputs, train_labels = prepare_synthetic_data_for_training(previous_synthetic_data)
        train_dataset = SimpleDataset(train_inputs, train_labels)

        training_args = TrainingArguments(
            output_dir="./results",
            num_train_epochs=10,
            per_device_train_batch_size=64,
            save_steps=10_000,
            save_total_limit=2,
            logging_steps=100
        )

        trainer = CustomTrainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
        )

        trainer.train()

        text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device='cpu')

    previous_synthetic_data = synthetic_data

    # Perform sentiment analysis on the current generation
    sentiment_results = analyze_sentiments(synthetic_data, sentiment_analyzer)
    regard_results = analyze_regard(synthetic_data, regard_analyzer)

    # Calculate perplexity for each sample
    perplexities = [calculate_perplexity(model, tokenizer, item['text']) for item in synthetic_data if item['text'].strip() != ""]
    average_perplexity = sum(perplexities) / len(perplexities) if perplexities else float('inf')

    # Store sentiment, regard, and perplexity results
    for item in synthetic_data:
        item['vader_score'] = next((res['score'] for res in sentiment_results if res['sample_id'] == item['sample_id']), None)
        item['regard_score'] = next((res['score'] for res in regard_results if res['sample_id'] == item['sample_id']), None)
        item['perplexity'] = calculate_perplexity(model, tokenizer, item['text']) if item['text'].strip() != "" else None

    generated_texts[-1] = pd.DataFrame(synthetic_data)

    # Calculate and store sentiment score differences for each generation
    actors_scores = [res['score'] for res in sentiment_results if res['category'] == "American_actors"]
    actresses_scores = [res['score'] for res in sentiment_results if res['category'] == "American_actresses"]
    avg_actors_score = sum(actors_scores) / len(actors_scores) if actors_scores else 0
    avg_actresses_score = sum(actresses_scores) / len(actresses_scores) if actresses_scores else 0
    score_difference = avg_actors_score - avg_actresses_score

    sentiment_history.append({
        'generation': generation,
        'actors': avg_actors_score,
        'actresses': avg_actresses_score,
        'difference': score_difference
    })

    # Calculate and store regard score differences for each generation
    actors_regard_scores = [res['score'] for res in regard_results if res['category'] == "American_actors" and res['score'] is not None]
    actresses_regard_scores = [res['score'] for res in regard_results if res['category'] == "American_actresses" and res['score'] is not None]
    avg_actors_regard_score = sum(actors_regard_scores) / len(actors_regard_scores) if actors_regard_scores else 0
    avg_actresses_regard_score = sum(actresses_regard_scores) / len(actresses_regard_scores) if actresses_regard_scores else 0
    regard_score_difference = avg_actors_regard_score - avg_actresses_regard_score

    regard_history.append({
        'generation': generation,
        'actors': avg_actors_regard_score,
        'actresses': avg_actresses_regard_score,
        'difference': regard_score_difference
    })

    perplexity_history.append({
        'generation': generation,
        'average_perplexity': average_perplexity
    })

# Save the generated texts to CSV
all_generated_texts = pd.concat(generated_texts)
all_generated_texts.to_csv("generated_texts_across_generations2.csv", index=False)

# Plot score differences over generations
plot_score_differences(sentiment_history, regard_history, perplexity_history)
