<a href="https://colab.research.google.com/github/Prishi99/WomenLine-AI-/blob/main/Multilingual_Prompts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Install Deep Translator


In [None]:
pip install pandas deep-translator


Collecting deep-translator
  Downloading deep_translator-1.11.4-py3-none-any.whl.metadata (30 kB)
Downloading deep_translator-1.11.4-py3-none-any.whl (42 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m371.4 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep-translator
Successfully installed deep-translator-1.11.4


**Translation in Hindi and Hinglish**

In [None]:
import pandas as pd
from deep_translator import GoogleTranslator

# Load your CSV file
df = pd.read_csv("Prompts.csv")  # Replace with your actual CSV path

# Add columns if they don't exist
df['Hindi Translation'] = ""
df['Code-Mixed (Hinglish)'] = ""

# Function to translate to Hindi
def translate_to_hindi(text):
    try:
        return GoogleTranslator(source='auto', target='hi').translate(text)
    except Exception as e:
        print(f"Error translating '{text}' to Hindi: {e}")
        return text

# Function to create code-mixed (Hinglish)
def code_mixed(text, hindi_translation):
    # Very simple code-mix strategy: combine original with keywords from Hindi
    return f"{text.split('?')[0]} ka kya solution hai?" if "?" in text else text + " ke baare mein kya jaan sakte hain?"

# Translate all prompts
for idx, row in df.iterrows():
    english_prompt = row['Prompt']
    hindi = translate_to_hindi(english_prompt)
    hinglish = code_mixed(english_prompt, hindi)

    df.at[idx, 'Hindi Translation'] = hindi
    df.at[idx, 'Code-Mixed (Hinglish)'] = hinglish

# Save to a new CSV
df.to_csv("translated_prompts.csv", index=False)
print("✅ Translations saved to translated_prompts.csv")


✅ Translations saved to translated_prompts.csv


**Intent Classification**

In [None]:
import pandas as pd

# Load the translated CSV
df = pd.read_csv("translated_prompts.csv")

# Define intent keywords and mappings
intent_keywords = {
    "symptom_query": ["cramp", "pain", "irregular", "fatigue", "bloating", "tender", "nausea", "clots", "spotting", "bleeding", "headache"],
    "mental_wellness": ["anxiety", "depression", "mood swing", "emotional", "stress", "feel better", "coping", "low", "mental"],
    "diet_advice": ["food", "eat", "diet", "drink", "nutrient", "magnesium", "snack", "vitamin", "craving", "herbal"],
    "exercise_guidance": ["exercise", "workout", "stretch", "yoga", "sports", "run", "swim", "cardio", "fitness", "physical"],
    "myth_busting": ["myth", "true", "false", "fact", "really", "virgin", "sync", "dirty", "pregnant", "stop your period"]
}

# Function to assign intent
def assign_intent(prompt):
    prompt_lower = prompt.lower()
    for intent, keywords in intent_keywords.items():
        if any(kw in prompt_lower for kw in keywords):
            return intent
    return "mental_wellness"  # default fallback

# Apply the function to create the Intent column
df["Intent"] = df["Prompt"].apply(assign_intent)

# Save the updated CSV
df.to_csv("categorized_prompts.csv", index=False)
print("✅ Categorization complete and saved to categorized_prompts.csv")


✅ Categorization complete and saved to categorized_prompts.csv


**Adding Context and Tag**

In [None]:
import pandas as pd

# Load the categorized file
df = pd.read_csv("categorized_prompts.csv")

# Add Context and Tags columns if not already present
df["Context"] = ""
df["Tags"] = ""

# Context and tag rules
def get_context_and_tags(prompt):
    p = prompt.lower()

    # Default values
    context = "general"
    tags = ["general"]

    # Health-related symptoms
    if any(word in p for word in ["cramp", "period pain", "irregular", "flow", "spotting", "bloating"]):
        context = "period health"
        tags = ["menstruation", "pain", "symptom"]

    elif any(word in p for word in ["mood swing", "anxiety", "depression", "lonely", "cry", "stress"]):
        context = "mental wellness"
        tags = ["emotion", "mental health", "stress"]

    elif any(word in p for word in ["exercise", "diet", "track", "remedy", "cope", "solution"]):
        context = "self-care"
        tags = ["coping", "remedy", "self-help"]

    elif any(word in p for word in ["pcos", "hormonal", "imbalance"]):
        context = "PCOS awareness"
        tags = ["PCOS", "hormones"]

    elif any(word in p for word in ["teen", "first period", "young", "school"]):
        context = "teen health"
        tags = ["teen", "first period", "awareness"]

    return context, ", ".join(tags)

# Apply to DataFrame
df[["Context", "Tags"]] = df["Prompt"].apply(lambda x: pd.Series(get_context_and_tags(x)))

# Save it
df.to_csv("final_prompts_dataset.csv", index=False, encoding='utf-8-sig')
print("✅ Context and Tags added (with defaults)! File saved as final_prompts_dataset.csv")


✅ Context and Tags added (with defaults)! File saved as final_prompts_dataset.csv


**Translation in Multiple Languages**

In [None]:
import pandas as pd
from deep_translator import GoogleTranslator

# Load your final dataset
df = pd.read_csv("final_prompts_dataset.csv")

# Updated list of languages to translate to
target_languages = {
    "Tamil": "ta",
    "Bengali": "bn",
    "Marathi": "mr",
    "Urdu": "ur",
    "Telugu": "te"
}

# Add empty columns for each language
for lang in target_languages:
    col_name = f"{lang} Translation"
    if col_name not in df.columns:
        df[col_name] = ""

# Translate function
def translate_prompt(text, target_lang_code):
    try:
        return GoogleTranslator(source='auto', target=target_lang_code).translate(text)
    except Exception as e:
        print(f"❌ Error for '{text}' in '{target_lang_code}': {e}")
        return ""

# Perform translation for each prompt into each language
for idx, row in df.iterrows():
    prompt = row["Prompt"]
    for lang, code in target_languages.items():
        translated_text = translate_prompt(prompt, code)
        df.at[idx, f"{lang} Translation"] = translated_text

# Save updated file
df.to_csv("multilingual_prompts_dataset.csv", index=False)
print("✅ All prompts translated and saved to multilingual_prompts_dataset.csv")


✅ All prompts translated and saved to multilingual_prompts_dataset.csv


In [None]:
df.to_csv("multilingual_prompts_dataset.csv", index=False, encoding='utf-8-sig')
