In [18]:
import csv
import os
import random


In [12]:
# ========= CONFIG =========
GOEMOTIONS_PATH = "./goemotions_1/goemotions_merged_clean.csv"      # path to your GoEmotions file
OUTPUT_PATH = "compatika_v1_40mb.txt"   # output dataset
TARGET_SIZE_MB = 40                     # desired size in MB
TARGET_SIZE_BYTES = TARGET_SIZE_MB * 1024 * 1024

In [13]:
# If your labels are text names already; if they are IDs, map IDs -> names here.
# Example emotion set; adjust to match your GoEmotions mapping.
EMOTION_NAMES = [
    "admiration", "amusement", "anger", "annoyance", "approval",
    "caring", "confusion", "curiosity", "desire", "disappointment",
    "disapproval", "disgust", "embarrassment", "excitement", "fear",
    "gratitude", "grief", "joy", "love", "nervousness",
    "optimism", "pride", "realization", "relief", "remorse",
    "sadness", "surprise", "neutral"
]


In [14]:
# Simple templates per emotion (you can expand this later)
TEMPLATES = {
    "sadness": [
        "That sounds really painful. Itâ€™s okay to feel sad. Want to share more about what happened?",
        "It makes sense that youâ€™d feel this way. You donâ€™t have to handle it alone. Whatâ€™s on your mind?"
    ],
    "anger": [
        "Itâ€™s understandable to feel angry in a situation like that. Do you want to talk about what set you off?",
        "Your feelings are valid. It sounds like something crossed a line for you."
    ],
    "anxiety": [
        "Feeling anxious can be really exhausting. Want to talk about whatâ€™s worrying you most right now?",
        "Itâ€™s okay to feel anxious. Taking it one step at a time can help. Whatâ€™s the biggest thing on your mind?"
    ],
    "fear": [
        "Feeling scared is completely understandable. Do you want to tell me what feels unsafe or uncertain?",
        "Itâ€™s okay to feel afraid. Youâ€™re not alone in this. Whatâ€™s making you feel this way?"
    ],
    "joy": [
        "Thatâ€™s wonderful to hear! What happened that made you feel so happy?",
        "Iâ€™m really glad youâ€™re feeling good. Want to tell me more about it?"
    ],
    "love": [
        "Thatâ€™s such a warm feeling. Itâ€™s nice to care about someone that much.",
        "It sounds like this person means a lot to you. Thatâ€™s really special."
    ],
    "gratitude": [
        "Itâ€™s lovely that you feel grateful. What made you feel that way?",
        "Thatâ€™s so nice. Gratitude can really brighten a day."
    ],
    "neutral": [
        "Thanks for sharing that. Tell me moreâ€”whatâ€™s on your mind about it?",
        "I hear you. How are you feeling about that right now?"
    ],
    # fallback if no recognized emotion
    "default": [
        "Thank you for sharing that. How are you feeling about it?",
        "Iâ€™m listening. Want to tell me more about whatâ€™s going on?"
    ],
}

In [15]:

def pick_template(labels):
    priority_order = ["sadness", "anger", "anxiety", "fear", "joy", "neutral"]

    for e in priority_order:
        if e in labels and e in TEMPLATES:
            return random.choice(TEMPLATES[e])

    for e in labels:
        if e in TEMPLATES:
            return random.choice(TEMPLATES[e])

    return random.choice(TEMPLATES["default"])

def load_goemotions(path):
    examples = []
    with open(path, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            text = row.get("text", "").strip()
            labels_raw = row.get("labels", "").strip()
            if not text:
                continue
            labels = [l.strip() for l in labels_raw.split(",") if l.strip()]
            examples.append((text, labels))
    return examples


In [16]:

def generate_dataset():
    examples = load_goemotions(GOEMOTIONS_PATH)
    print(f"Loaded {len(examples)} GoEmotions examples.")

    random.shuffle(examples)

    with open(OUTPUT_PATH, "w", encoding="utf-8") as out_f:
        index = 0
        written = 0

        while written < TARGET_SIZE_BYTES:
            text, labels = examples[index]
            response = pick_template(labels)

            sample = f"USER: {text}\nCOMPATIKA: {response}\n\n"

            out_f.write(sample)
            written += len(sample.encode("utf-8"))

            index += 1
            if index >= len(examples):
                index = 0

    final_size = os.path.getsize(OUTPUT_PATH)
    print(f"Done! Output file: {OUTPUT_PATH}, size: {final_size / (1024*1024):.2f} MB")

In [19]:
generate_dataset()

Loaded 211225 GoEmotions examples.
Done! Output file: compatika_v1_40mb.txt, size: 40.80 MB


In [22]:
import csv
import os
import random

# ================== CONFIG ==================
# ðŸ‘‡ Change this to the FULL path of your GoEmotions CSV
GOEMOTIONS_PATH = "./goemotions_1/goemotions_merged_clean.csv" 

OUTPUT_PATH = "compatika_v1_40mb.txt"
TARGET_SIZE_MB = 40
TARGET_SIZE_BYTES = TARGET_SIZE_MB * 1024 * 1024

# ================== TEMPLATES ==================
# Compatika-style emotional responses.
TEMPLATES = {
    "sadness": [
        "It sounds like youâ€™re carrying a lot of sadness. You donâ€™t have to go through it alone. Want to talk about what hurts?",
        "Your feelings make sense. When something matters to us, sadness can hit deeply. What part feels the heaviest right now?",
        "Itâ€™s okay to feel this way. Iâ€™m here with you. What happened that brought these feelings up?",
        "It sounds really painful. You deserve space to feel this. Want to share more about whatâ€™s going on?"
    ],
    "anger": [
        "Itâ€™s understandable to feel angry when things feel unfair. Do you want to talk through what happened?",
        "Your anger makes sense. Something important must have felt crossed. Want to tell me about it?",
        "It sounds like this really got to you. Iâ€™m here to listen if you want to unpack it.",
        "Feeling frustrated doesnâ€™t make you wrong. It means something didnâ€™t sit right. What part bothered you the most?"
    ],
    "anxiety": [
        "Feeling anxious can be exhausting. Iâ€™m here with you. Want to talk about whatâ€™s worrying you right now?",
        "Itâ€™s okay to feel nervous. Youâ€™re doing your best. What part of this situation feels the most overwhelming?",
        "Anxiety can show up fast. Take your time. Whatâ€™s the biggest thought on your mind?",
        "Youâ€™re not alone in feeling this way. What would help you feel a little calmer?"
    ],
    "nervousness": [  # alias for anxiety
        "Itâ€™s completely okay to feel nervous. New or uncertain things can be scary. Whatâ€™s making you feel this way?",
        "It sounds like youâ€™re on edge about this. Do you want to talk through what youâ€™re worried might happen?"
    ],
    "fear": [
        "Itâ€™s completely okay to feel scared. Something must feel uncertain. Want to share whatâ€™s making you feel this way?",
        "Fear can be hard to carry alone. Iâ€™m here with you. Whatâ€™s the part that feels the most frightening?",
        "It sounds like youâ€™re facing something difficult. What would help you feel a bit safer right now?",
        "Your fear is valid. Itâ€™s okay to take things one small step at a time."
    ],
    "joy": [
        "Thatâ€™s wonderful to hear! What happened that made your day so good?",
        "Iâ€™m really glad youâ€™re feeling good. Want to tell me more about it?",
        "That sounds really joyful. Itâ€™s nice to hear moments like this. What made it special?",
        "I love that youâ€™re feeling this way. Whatâ€™s the highlight for you right now?"
    ],
    "love": [
        "Thatâ€™s such a warm feeling. Itâ€™s nice to care about someone that much.",
        "It sounds like this person means a lot to you. Thatâ€™s really special.",
        "Love can be powerful and tender at the same time. How does it feel to carry that right now?",
        "Itâ€™s beautiful that you feel this way. Would you like to talk more about them?"
    ],
    "gratitude": [
        "Itâ€™s lovely that you feel grateful. What made you feel that way?",
        "Thatâ€™s so nice. Gratitude can really brighten a day.",
        "It sounds like you really appreciate this. What part of it matters most to you?",
        "Noticing things youâ€™re grateful for can be really grounding. What are you grateful for today?"
    ],
    "disappointment": [
        "It makes sense to feel disappointed when things donâ€™t go how you hoped. Want to talk about what happened?",
        "That sounds really discouraging. Your feelings are valid. What did you wish had gone differently?",
        "Itâ€™s okay to feel let down. Sometimes plans or people donâ€™t match our hopes.",
        "I hear your disappointment. What would have felt better for you in this situation?"
    ],
    "remorse": [
        "It sounds like youâ€™re really reflecting on what happened. Regret can be heavy. Want to talk about it?",
        "The fact that you feel remorse shows you care. Would it help to think about how you might move forward?",
        "Itâ€™s okay to wish youâ€™d done things differently. Youâ€™re allowed to learn and grow from this.",
        "Youâ€™re not alone in feeling this way. What would being kinder to yourself look like here?"
    ],
    "surprise": [
        "That sounds surprising. How are you feeling about it now that itâ€™s happened?",
        "Wow, that seems unexpected. Was it a pleasant surprise or more unsettling?",
        "It can be a lot when things catch us off guard. What part surprised you the most?",
        "Unexpected moments can bring up mixed feelings. How is it sitting with you now?"
    ],
    "amusement": [
        "That does sound amusing. Iâ€™m glad you had a light moment like that. Want to share more?",
        "Itâ€™s nice to find things that make you laugh. What about it made you smile?",
        "I love that you found that funny. Little moments like that can really brighten a day.",
        "Itâ€™s great that you can laugh about it. Humor can be such a relief."
    ],
    "caring": [
        "Itâ€™s really kind that you care so much. How are you feeling about the situation?",
        "Your care really shows. It sounds like this person matters a lot to you.",
        "Itâ€™s thoughtful of you to be so concerned. How can you take care of yourself too?",
        "Caring deeply can be both beautiful and heavy. Whatâ€™s on your heart right now?"
    ],
    "confusion": [
        "Itâ€™s okay to feel confused. Things donâ€™t always make sense right away. Want to talk it through?",
        "Confusion can feel uncomfortable, but itâ€™s also part of figuring things out. Whatâ€™s most unclear for you?",
        "You donâ€™t have to have all the answers. What part is bothering you the most?",
        "It sounds like youâ€™re trying to make sense of a lot. Iâ€™m here if you want to unpack it slowly."
    ],
    "curiosity": [
        "Itâ€™s great that youâ€™re curious. What would you like to explore or understand more?",
        "Curiosity can lead to meaningful discoveries. Whatâ€™s on your mind?",
        "I like that youâ€™re asking questions. Where do you feel like starting?",
        "Your curiosity is a strength. What are you most interested in right now?"
    ],
    "desire": [
        "It sounds like this is really important to you. What draws you to it the most?",
        "Wanting something deeply can feel intense. How does it feel to sit with that desire?",
        "Itâ€™s okay to want what you want. What would it mean to you to have it?",
        "That longing sounds strong. Do you want to talk about it more?"
    ],
    "disgust": [
        "It sounds like this really pushed your boundaries. Want to tell me what felt so off about it?",
        "Feeling disgusted can be a strong signal that something doesnâ€™t align with your values.",
        "Your reaction is valid. Something in this clearly didnâ€™t sit right with you.",
        "Itâ€™s okay to feel repulsed or turned off. Do you want to share more about why?"
    ],
    "embarrassment": [
        "Feeling embarrassed can be really uncomfortable. Youâ€™re not alone in that.",
        "It sounds like youâ€™re being hard on yourself. Many people would feel the same in your place.",
        "That must have felt awkward. Do you want to talk about what happened?",
        "Itâ€™s okay to cringe at things sometimes. It doesnâ€™t define who you are."
    ],
    "excitement": [
        "Thatâ€™s exciting! It sounds like something really positive is happening. Want to share more?",
        "I love hearing your excitement. What are you looking forward to the most?",
        "Itâ€™s great that you feel so energized about this. Whatâ€™s making it special?",
        "Your excitement really comes through. How does it feel to anticipate this?"
    ],
    "grief": [
        "Iâ€™m really sorry youâ€™re going through this. Grief can be incredibly heavy.",
        "It sounds like youâ€™ve lost something or someone important. Iâ€™m here with you.",
        "Grief can come in waves. However it shows up is okay. Want to talk about what youâ€™re missing?",
        "Your pain is valid. You donâ€™t have to rush through it. Iâ€™m here to listen."
    ],
    "optimism": [
        "Itâ€™s lovely that youâ€™re feeling hopeful. What makes you feel optimistic right now?",
        "Thatâ€™s a really encouraging outlook. What are you looking forward to?",
        "Holding onto hope can be powerful. What helps you stay positive?",
        "Your optimism really shines through. Itâ€™s nice to hear that."
    ],
    "pride": [
        "Thatâ€™s amazingâ€”you have every right to feel proud. What did you do that youâ€™re celebrating?",
        "Itâ€™s great that youâ€™re recognizing your own effort. You earned that feeling.",
        "Iâ€™m glad youâ€™re giving yourself credit. What does this achievement mean to you?",
        "You should be proud of yourself. It sounds like you worked hard for this."
    ],
    "realization": [
        "That sounds like an important realization. How does it feel to see things this way now?",
        "New insights can be powerful, even if theyâ€™re uncomfortable. What stands out the most to you?",
        "It seems like something has really clicked for you. Want to talk more about it?",
        "Realizations like that can shift a lot. How are you processing it?"
    ],
    "relief": [
        "Iâ€™m glad you feel some relief. That must take a bit of weight off.",
        "It sounds like a burden has lightened. What feels different now?",
        "Relief can be such a comforting feeling. What changed for you?",
        "Iâ€™m happy to hear things feel a bit easier. How are you feeling now compared to before?"
    ],
    "approval": [
        "It sounds like you feel good about this, and thatâ€™s important.",
        "Iâ€™m glad this feels right to you. Your perspective matters.",
        "Itâ€™s nice that this lines up with your values. How does that feel?",
        "Your sense of approval here really shows what you care about."
    ],
    "disapproval": [
        "It sounds like this really didnâ€™t sit well with you. Want to talk about why?",
        "Your discomfort makes sense. Something here must feel off or wrong.",
        "Itâ€™s okay to disagree or disapprove. What part feels most important to you?",
        "Your reaction shows that your values matter a lot to you."
    ],
    "admiration": [
        "Itâ€™s really nice that you admire this. What about it inspires you?",
        "That kind of appreciation says a lot about what you value.",
        "It sounds like this person or thing really resonates with you. What stands out?",
        "Admiration like that can be really motivating. How does it influence you?"
    ],
    "neutral": [
        "Thanks for sharing that. How are you feeling about it?",
        "I hear you. Whatâ€™s on your mind about this right now?",
        "That makes sense. Is there anything youâ€™d like to explore about it?",
        "Iâ€™m listening. Tell me more, if youâ€™d like."
    ],
    "default": [
        "Thank you for sharing that. How are you feeling about it?",
        "Iâ€™m listening. Want to tell me more about whatâ€™s going on?",
        "Your perspective matters. What feels most important to you right now?",
        "Iâ€™m here with you. What would you like to talk about more?"
    ],
}

# Map raw GoEmotions labels to our template keys (canonical names)
EMOTION_CANONICAL = {
    "sadness": "sadness",
    "anger": "anger",
    "annoyance": "anger",
    "nervousness": "anxiety",
    "anxiety": "anxiety",
    "fear": "fear",
    "joy": "joy",
    "love": "love",
    "gratitude": "gratitude",
    "disappointment": "disappointment",
    "remorse": "remorse",
    "grief": "grief",
    "surprise": "surprise",
    "amusement": "amusement",
    "caring": "caring",
    "confusion": "confusion",
    "curiosity": "curiosity",
    "desire": "desire",
    "disgust": "disgust",
    "embarrassment": "embarrassment",
    "excitement": "excitement",
    "optimism": "optimism",
    "pride": "pride",
    "realization": "realization",
    "relief": "relief",
    "approval": "approval",
    "disapproval": "disapproval",
    "admiration": "admiration",
    "neutral": "neutral",
}

PRIORITY_ORDER = [
    "grief", "sadness", "remorse", "fear", "anxiety",
    "anger", "disappointment", "disgust",
    "joy", "excitement", "love", "gratitude", "optimism", "pride",
    "caring", "confusion", "curiosity",
    "relief", "surprise", "amusement",
    "approval", "disapproval", "admiration",
    "neutral"
]

def pick_template(labels):
    """
    Pick a Compatika-style reply based on emotion labels from GoEmotions.
    labels = list of raw label strings, e.g. ["sadness", "fear"]
    """
    # Normalize labels -> canonical keys
    canon_labels = []
    for l in labels:
        l = l.strip()
        if l in EMOTION_CANONICAL:
            canon_labels.append(EMOTION_CANONICAL[l])

    if not canon_labels:
        return random.choice(TEMPLATES["default"])

    # Try priority order first
    for emo in PRIORITY_ORDER:
        if emo in canon_labels and emo in TEMPLATES:
            return random.choice(TEMPLATES[emo])

    # Fallback: any matching template
    for emo in canon_labels:
        if emo in TEMPLATES:
            return random.choice(TEMPLATES[emo])

    # Final fallback
    return random.choice(TEMPLATES["default"])

# ================== DATA LOADING & GENERATION ==================

def load_goemotions(path):
    """
    Load GoEmotions CSV.
    Assumes columns: 'text' and 'labels'
    where 'labels' is like 'sadness,joy' or 'sadness'.
    Adjust column names if your file is different.
    """
    examples = []
    with open(path, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            text = row.get("text", "").strip()
            labels_raw = row.get("labels", "").strip()
            if not text:
                continue
            labels = [l.strip() for l in labels_raw.split(",") if l.strip()]
            examples.append((text, labels))
    return examples

def generate_dataset():
    examples = load_goemotions(GOEMOTIONS_PATH)
    print(f"Loaded {len(examples)} GoEmotions examples.")

    random.shuffle(examples)

    with open(OUTPUT_PATH, "w", encoding="utf-8") as out_f:
        index = 0
        written = 0

        while written < TARGET_SIZE_BYTES:
            text, labels = examples[index]
            response = pick_template(labels)

            sample = f"USER: {text}\nCOMPATIKA: {response}\n\n"

            out_f.write(sample)
            written += len(sample.encode("utf-8"))

            index += 1
            if index >= len(examples):
                index = 0

    final_size = os.path.getsize(OUTPUT_PATH)
    print(f"Done! Output file: {OUTPUT_PATH}, size: {final_size / (1024*1024):.2f} MB")


In [23]:
generate_dataset()


Loaded 211225 GoEmotions examples.
Done! Output file: compatika_v1_40mb.txt, size: 40.79 MB
