#Downloads

In [None]:
!pip install torchcodec --quiet
!pip install torchinfo --quiet
!pip install transformers accelerate sentencepiece torchaudio diffusers datasets soundfile pillow --quiet
!pip install rouge-score

#Imports

In [None]:
import os
import shutil
import soundfile as sf
from IPython.display import display

import torch
import torchaudio
import torch.nn as nn
from torchinfo import summary
from torchvision import datasets
from torchvision.models import vit_b_16
from torch.utils.data import DataLoader, random_split, TensorDataset, Dataset

from PIL import Image
from diffusers import FluxPipeline
from transformers import WhisperProcessor, WhisperModel, WhisperForConditionalGeneration, pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM, GenerationConfig


import gc
import json
import uuid
import time
import random
import librosa
import logging
import kagglehub
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
from pathlib import Path
import matplotlib.pyplot as plt
from dataclasses import dataclass
from sklearn.svm import LinearSVC
from rouge_score import rouge_scorer
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from huggingface_hub import login
login()
os.environ["HF_TOKEN"] = ""

#Rewrite Prompts LLM part alone

Rewrite 5000 prompts from text2image dataset to use them for Image generation later.

Also Used for Rouge evaluation.

In [None]:
#Took 5000 prompts randomely from text2image dataset
!cp "/content/drive/MyDrive/GenAI/Project/text2image_5000.jsonl" "/content/"

In [None]:
#We picked these emotions because later we can do emoA on them
vit_classes = {
   'disgust': 0, 'fear': 1,
    'happy': 2, 'sad': 3, 'surprised': 4
}
emotion_names = list(vit_classes.keys())


prompts = []
with open("/content/drive/MyDrive/GenAI/Project/text2image_5000.jsonl", "r") as f:
    for line in f:
        line = line.strip()
        if line:
            prompts.append(json.loads(line))

# Build labeled dataset (text + random emotion only)
labeled_data = []
for item in prompts:
    text = item["text"]
    emotion = random.choice(emotion_names)

    labeled_data.append({
        "text": text,
        "emotion": emotion
    })

with open("/content/labeled_prompts.json", "w") as f:
    json.dump(labeled_data, f, indent=2)


In [None]:
# @title
system_prompt = (
"""
Your task is to modify the given prompt by inserting emotion-related adjectives
while preserving every original word.

Rules:
1. Do NOT delete, replace, or reorder any original words.
2. Only INSERT 1–2 adjectives related to the given emotion.
3. Adjectives must be inserted directly before nouns.
4. Do NOT add new sentences, explanations, or details.
5. Output only the modified prompt — nothing else.

Format:
Original prompt: <prompt>
Emotion: <emotion>
Output: <modified prompt>
"""
)

user_template = (
"""
Original prompt: {prompt}
Emotion: {emotion}
Output:
"""
)


class LLM(nn.Module):
    def __init__(
        self,
        llm_model_name="meta-llama/Llama-3.1-8B-Instruct",
        device="cuda",
    ):
        super().__init__()

        self.device = device


        self.llm = AutoModelForCausalLM.from_pretrained(
            llm_model_name,
            torch_dtype=torch.bfloat16,
            device_map={"": 0}
        )

        self.tokenizer = AutoTokenizer.from_pretrained(llm_model_name, use_fast=True)
        self.tokenizer.pad_token = self.tokenizer.eos_token

    def forward(
        self,
        prompt,
        emotion,
        max_new_tokens=40,
        temperature=0.6,
        top_k=50,
        top_p=0.9,
        do_sample=True,
        stop_strings=None
    ):


        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_template.format(prompt=prompt, emotion=emotion).strip()},
        ]


        input_ids = self.tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            return_tensors="pt"
        ).to(self.llm.device)

        # FIX: proper attention mask
        attention_mask = (input_ids != self.tokenizer.pad_token_id).long()

        with torch.no_grad():
            output_ids = self.llm.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=max_new_tokens,
                temperature=temperature,
                top_p=top_p,
                top_k=top_k,
                do_sample=do_sample,
                pad_token_id=self.tokenizer.eos_token_id,
                eos_token_id=self.tokenizer.eos_token_id,
            )


        gen_ids = output_ids[0, input_ids.shape[1]:]
        text = self.tokenizer.decode(gen_ids, skip_special_tokens=True).strip()

        # Cleanup mostly for other reasoning and non reasoning models that we used
        if "</think>" in text:
            text = text.split("</think>", 1)[-1].strip()


        bad_prefixes = [
            "Original prompt:", "Output:", "Modified prompt:", "modified prompt:",
            "Rewritten Prompt:", "Prompt:", "Result:"
        ]
        for p in bad_prefixes:
            if p in text:
                text = text.split(p)[-1].strip()


        if "Original prompt:" in text and "Emotion:" in text:

            if "Output:" in text:
                text = text.split("Output:")[-1].strip()


        text = text.strip().strip('"').strip()

        return {
            "input_prompt": prompt,
            "emotion": emotion,
            "raw_output": text,
            "final_prompt": text
        }


In [None]:
llm_model = LLM().to("cuda")
llm_model.eval()

In [None]:
#Take all items for now
subset = labeled_data


prompts = [item["text"] for item in subset]
emotions = [item["emotion"] for item in subset]


new_prompts = []
unfiltered = []

for original, emo in tqdm(zip(prompts, emotions), total=len(prompts), desc="Generating prompts"):
    result = llm_model(
        prompt=original,
        emotion=emo,
        max_new_tokens=100,
        temperature=0.0,
        do_sample=False
    )
    new_prompts.append(result["final_prompt"])
    unfiltered.append(result["raw_output"])


output_data = []

for original, emotion, new in zip(prompts, emotions, new_prompts):
    output_data.append({
        "original_prompt": original,
        "emotion": emotion,
        "new_prompt": new
    })


with open("/content/modified_prompts.json", "w", encoding="utf-8") as f:
    json.dump(output_data, f, ensure_ascii=False, indent=4)


print("Saved to modified_prompts.json")


for original, emotion, new in zip(prompts, emotions, new_prompts):
    print(f"Original prompt: {original}")
    print(f"Emotion: {emotion}")
    print(f"New prompt: {new}")
    print("\n" + "-"*50 + "\n")


#Eval Rewritten Prompts Using Rouge Score

In [None]:

with open("/content/modified_prompts.json", "r", encoding="utf-8") as f:
    data = json.load(f)


rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

results = []

for item in data:
    original = item["original_prompt"]
    rewritten = item["new_prompt"]

    scores = rouge.score(original, rewritten)

    results.append({
        "original_prompt": original,
        "emotion": item["emotion"],
        "new_prompt": rewritten,
        "rouge1": scores["rouge1"].fmeasure,
        "rouge2": scores["rouge2"].fmeasure,
        "rougeL": scores["rougeL"].fmeasure
    })

# Compute averages
avg_rouge1 = sum(r["rouge1"] for r in results) / len(results)
avg_rouge2 = sum(r["rouge2"] for r in results) / len(results)
avg_rougeL = sum(r["rougeL"] for r in results) / len(results)

print("=== AVERAGE ROUGE SCORES ===")
print("ROUGE-1:", avg_rouge1)
print("ROUGE-2:", avg_rouge2)
print("ROUGE-L:", avg_rougeL)
