<a href="https://colab.research.google.com/github/Prishi99/Velmora/blob/main/Multilingual_Prompts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install pandas deep-translator


Collecting deep-translator
  Downloading deep_translator-1.11.4-py3-none-any.whl.metadata (30 kB)
Downloading deep_translator-1.11.4-py3-none-any.whl (42 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep-translator
Successfully installed deep-translator-1.11.4


In [2]:
import pandas as pd
from deep_translator import GoogleTranslator

# Load your CSV file
df = pd.read_csv("Prompts.csv")  # Replace with your actual CSV path

# Add columns if they don't exist
df['Hindi Translation'] = ""
df['Code-Mixed (Hinglish)'] = ""

# Function to translate to Hindi
def translate_to_hindi(text):
    try:
        return GoogleTranslator(source='auto', target='hi').translate(text)
    except Exception as e:
        print(f"Error translating '{text}' to Hindi: {e}")
        return text

# Function to create code-mixed (Hinglish)
def code_mixed(text, hindi_translation):
    # Very simple code-mix strategy: combine original with keywords from Hindi
    return f"{text.split('?')[0]} ka kya solution hai?" if "?" in text else text + " ke baare mein kya jaan sakte hain?"

# Translate all prompts
for idx, row in df.iterrows():
    english_prompt = row['Prompt']
    hindi = translate_to_hindi(english_prompt)
    hinglish = code_mixed(english_prompt, hindi)

    df.at[idx, 'Hindi Translation'] = hindi
    df.at[idx, 'Code-Mixed (Hinglish)'] = hinglish

# Save to a new CSV
df.to_csv("translated_prompts.csv", index=False)
print("✅ Translations saved to translated_prompts.csv")


✅ Translations saved to translated_prompts.csv


In [3]:
import pandas as pd

# Load the translated CSV
df = pd.read_csv("translated_prompts.csv")

# Add a new column for Category
df["Category"] = ""

# Define basic keyword rules
def categorize(prompt):
    prompt_lower = prompt.lower()

    if any(x in prompt_lower for x in ["symptom", "cramp", "pain", "irregular", "fatigue", "bloating"]):
        return "Symptoms"
    elif any(x in prompt_lower for x in ["anxiety", "depression", "mood swing", "emotional", "stress"]):
        return "Emotions"
    elif "?" in prompt_lower or prompt_lower.startswith("how") or prompt_lower.startswith("what") or prompt_lower.startswith("why"):
        return "Questions"
    else:
        return "Intent"

# Apply categorization
df["Category"] = df["Prompt"].apply(categorize)

# Save categorized CSV
df.to_csv("categorized_prompts.csv", index=False)
print("✅ Categorization complete and saved to categorized_prompts.csv")


✅ Categorization complete and saved to categorized_prompts.csv


In [4]:
import pandas as pd

# Load the categorized file
df = pd.read_csv("categorized_prompts.csv")

# Add Context and Tags columns if not already present
df["Context"] = ""
df["Tags"] = ""

# Context and tag rules
def get_context_and_tags(prompt):
    p = prompt.lower()

    # Default values
    context = "general"
    tags = []

    # Health-related symptoms
    if any(word in p for word in ["cramp", "period pain", "irregular", "flow", "spotting", "bloating"]):
        context = "period health"
        tags = ["menstruation", "pain", "symptom"]

    elif any(word in p for word in ["mood swing", "anxiety", "depression", "lonely", "cry", "stress"]):
        context = "mental wellness"
        tags = ["emotion", "mental health", "stress"]

    elif any(word in p for word in ["exercise", "diet", "track", "remedy", "cope", "solution"]):
        context = "self-care"
        tags = ["coping", "remedy", "self-help"]

    elif any(word in p for word in ["pcos", "hormonal", "imbalance"]):
        context = "PCOS awareness"
        tags = ["PCOS", "hormones"]

    elif any(word in p for word in ["teen", "first period", "young", "school"]):
        context = "teen health"
        tags = ["teen", "first period", "awareness"]

    # Return
    return context, ", ".join(tags)

# Apply to DataFrame
df[["Context", "Tags"]] = df["Prompt"].apply(lambda x: pd.Series(get_context_and_tags(x)))

# Save it
df.to_csv("final_prompts_dataset.csv", index=False)
print("✅ Context and Tags added! File saved as final_prompts_dataset.csv")


✅ Context and Tags added! File saved as final_prompts_dataset.csv
