In [None]:
import json
import random
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from tqdm.notebook import trange, tqdm
from datetime import datetime
from uuid import uuid4
import ollama

In [None]:
# Load LLaMA model
def load_llama_model():
    """Load LLaMA model and tokenizer"""
    model_name = "/home/g4/Llama-3.2-3B-Instruct"
    model = AutoModelForCausalLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    tokenizer.pad_token_id = tokenizer.pad_token_id or tokenizer.eos_token_id
    model.config.pad_token_id = model.config.pad_token_id or model.config.eos_token_id

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    return model.to(device), tokenizer, device




In [None]:
import glob
import os


In [None]:
# model, tokenizer, device = load_llama_model()

# Define cognitive distortions
cognitive_distortions = {
   "All or Nothing Thinking": "I view a situation, a person or an event in “either-or” terms, fitting them into only two extreme categories instead of on a continuum.",
    "Fortune Telling": "I predict the future in negative terms and believe that what will happen will be so awful that I will not be able to stand it.",
    "Emotional Reasoning": "I believe my emotions reflect reality and let them guide my attitudes and judgments.",
    "Labeling/Global Labeling": "I put a fixed, usually negative, global label on myself or others.",
    "Mental Filter": "I pay attention to one or a few details and fail to see the whole picture.",
    "Mind Reading": "I believe that I know the thoughts or intentions of others (or that they know my thoughts or intentions) without sufficient evidence.",
    "Overgeneralization": "I take isolated negative cases and generalize them, using words like “always,” “never,” “whole,” “entire,” etc.",
    "Personalization": "I assume that others’ behaviors and external events concern myself without considering other plausible explanations.",
    "Should Statements": "I tell myself that events, people’s behaviors, and my own attitudes “should” be the way I expected, not as they are.",
    "Blaming": "I direct blame to others for my negative feelings or take responsibility for others' behaviors and attitudes.",
    "What if?": "I keep asking questions like “what if something happens?” focusing on negative outcomes.",
    "Discounting the Positive": "I disqualify positive experiences or events, insisting that they don’t count.",
    "Magnification/Minimization": "I emphasize the negatives or downplay positives in myself, others, or situations.",
    "Jumping to Conclusions": "I draw conclusions from little or no confirmatory evidence.",
    "Unfair Comparisons": "I compare myself with others who seem better and place myself at a disadvantage."
}


# Variants of writing style for diversity
tones = ["angry", "calm", "frustrated", "sarcastic", "venting", "blunt", "sad", "anxious", "direct"]
structures = ["structured", "scattered", "nuanced", "blunt", "sarcastic"]
intensities = ["mild", "intense", "slightly annoyed", "highly emotional", "casual"]
use_of_language = ["use cuss words", "avoid cuss words", "use formal language", "use casual language"]
# 
system_message = """
You are an expert assistant specializing in identifying and generating text examples that reflect common cognitive distortions.
Your task is to generate writing prompts and then create text from the perspective of people experiencing specific cognitive distortions without being aware of it.
Each time you are asked to generate text, remember that the person should not be aware of their cognitive distortion.
Make sure the situation, writing style, and personality of the person are different for each response.
The final text should demonstrate the cognitive distortion in a subtle, human, realistic way.
Make responses realistic, expressive, and aligned with the triggering event.
"""



In [None]:
def generate_response(text: str, count) -> str:
    # messages = [
    #     {"role": "system", "content": system_message},
    #     {"role": "user", "content": text},
    # ]
   
    # inputs = tokenizer.apply_chat_template(messages, tokenize=True, return_dict=True, add_generation_prompt=True, return_tensors="pt")
    # inputs = {k: v.to(device) for k, v in inputs.items()}

    # outputs = model.generate(
    #     **inputs,
    #     max_new_tokens=count,
    #     temperature=0.1,
    #     top_p=0.9,
    #     repetition_penalty=1.1,
    #     do_sample=True,
    #     pad_token_id=tokenizer.pad_token_id,
    #     eos_token_id=tokenizer.eos_token_id,
    # )

    # return tokenizer.decode(outputs[0][len(inputs["input_ids"][0]):])
    response = ollama.chat(
        model="mistral-nemo",
        messages=[{
            'role': 'system',
            'content': (system_message)
        }, {
            'role': 'user',
            'content': (text)
        }],
        options={'temperature': 0.7, 'top_p':0.9, 'repetition_penalty':1.1, 'do_sample':True, 'max_new_tokens': count}
    )

    response_text = response['message']['content'].strip()

    return response_text



In [None]:
# Load pre-generated trigger events from multiple JSON files in the data directory
trigger_events_dir = "/home/g4/Mindwell/DataGenerationCBTApproach/trigger_events_llama_new/all_trigger"
# Adjust the glob pattern if necessary; here we load all JSON files.
trigger_events_files = glob.glob(os.path.join(trigger_events_dir, "*.json"))

trigger_events = []
for file in trigger_events_files:
    with open(file, "r") as f:
        data = json.load(f)
        # Assuming each file contains a list of trigger events
        if data != []:
            trigger_events.append(data)

# Generate subjective assessments based on trigger events
output_dir = "/home/g4/Mindwell/DataGenerationCBTApproach/data_nemo_llama_new/new_10k"
os.makedirs(output_dir, exist_ok=True)

for event_file in tqdm(trigger_events, desc="Generating Assessments"):
    generated_data = []
    for event in event_file:
        distortion = random.choice(list(cognitive_distortions.keys()))  # Randomly assign a cognitive distortion
    
        user_message = (
            f"Based on the following event, generate a subjective reaction reflecting the cognitive distortion '{distortion}':\n"
            f"Triggering Event: {event['Generated Scenario']}\n\n"
            f"The response should reflect '{cognitive_distortions[distortion]}' subtly, "
            f"without explicitly stating the distortion. "
            f"Make sure the person seems unaware of their cognitive bias.\n\n"
            f"Use a {random.choice(tones)} tone, {random.choice(structures)} structure, and {random.choice(intensities)} intensity. "
            f"{random.choice(use_of_language)} in the response."
            f"You should output the data only without any commentary."
        )
    
        try:
            generated_assessment = generate_response(user_message, 800).strip()
        
            generated_data.append({
                "Triggering Event": event["Generated Scenario"],
                "Sex": event["Sex"],
                "Age": event["Age"],
                "Occupation": event["Occupation"],
                "Relationship Status": event["Relationship Status"],
                "Negative Emotion": event["Negative Emotion"],
                "Cognitive Distortion": distortion,
                "Generated Assessment": generated_assessment
            })
        except Exception as e:
            print(f"Error generating assessment: {e}")

        exit()

    # Save the generated data
    timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    file_name = f"{output_dir}/{timestamp}_subjective_assessments.json"
    with open(file_name, 'w') as f:
        json.dump(generated_data, f, indent=4)
    print(f"Saved {len(generated_data)} subjective assessments to {file_name}")

Generating Assessments:   0%|          | 0/1000 [00:00<?, ?it/s]

Saved 10 subjective assessments to /home/g4/Mindwell/DataGenerationCBTApproach/data_nemo/new_10k/2025-05-06_18-00-10_subjective_assessments.json
Saved 10 subjective assessments to /home/g4/Mindwell/DataGenerationCBTApproach/data_nemo/new_10k/2025-05-06_18-01-05_subjective_assessments.json
Saved 10 subjective assessments to /home/g4/Mindwell/DataGenerationCBTApproach/data_nemo/new_10k/2025-05-06_18-01-50_subjective_assessments.json
Saved 10 subjective assessments to /home/g4/Mindwell/DataGenerationCBTApproach/data_nemo/new_10k/2025-05-06_18-02-32_subjective_assessments.json
Saved 10 subjective assessments to /home/g4/Mindwell/DataGenerationCBTApproach/data_nemo/new_10k/2025-05-06_18-03-48_subjective_assessments.json
Saved 10 subjective assessments to /home/g4/Mindwell/DataGenerationCBTApproach/data_nemo/new_10k/2025-05-06_18-04-33_subjective_assessments.json
Saved 10 subjective assessments to /home/g4/Mindwell/DataGenerationCBTApproach/data_nemo/new_10k/2025-05-06_18-05-45_subjective_as

: 