In [1]:
#!pip install pandas openai


In [9]:
import pandas as pd
import os
import re
from dotenv import load_dotenv
from openai import OpenAI
import time

In [18]:
# Step 1: Load Data
df = pd.read_csv("train_filled.csv")
df

Unnamed: 0,post,condition,response
0,5 Simple family activities to get and stay hea...,none,The post does not display any emotional indica...
1,In coming | Friendship 2018 New Sortflim BY | ...,none,The post does not contain any emotional indica...
2,Are there any youtubers that vlog about their ...,depression,The post suggests that the poster is actively ...
3,I skip so many songs i used to love. All they ...,depression,The post expresses a significant emotional dis...
4,My friend of 16 years thinks I stole from her....,none,The post expresses a feeling of distress and b...
...,...,...,...
495,**I don't want to be anxious in drawing class....,stress,The poster expresses a desire to not feel anxi...
496,I have very little understanding of developing...,none,The poster is requesting help and guidance wit...
497,Accounts incredibly overdue. A life entirely u...,stress,The language used in this post suggests that t...
498,The father has one of the most notable family ...,none,The post describes a difficult legal situation...


In [11]:



api_key ="Open router API key here"


In [12]:
openai = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=api_key,
)
#model="mistralai/mistral-7b-instruct:free"
#model = "google/gemini-2.0-flash-lite-preview-02-05:free"
model = "google/gemma-3-27b-it:free"
# model = "deepseek/deepseek-r1-zero:free"

In [13]:


POST_SYSTEM_PROMPT = """
You are a professional Vietnamese mental health translator specializing in emotionally sensitive and culturally accurate translations. Your task is to translate English Reddit posts into fluent, natural Vietnamese while preserving the emotional tone and psychological context of each message.

Your translation should sound like it was originally written by a Vietnamese person experiencing or reflecting on mental health struggles — authentic, empathetic, and conversational. Avoid robotic or overly formal language.

Key guidelines:
- Preserve the emotional depth, distress, and vulnerability of the original message.
- Translate slang, idioms, and informal speech into modern Vietnamese equivalents used in online communities (e.g., 'chán nản', 'bế tắc', 'mất phương hướng', 'tự làm hại', 'lo âu xã hội').
- Adapt Western cultural references when needed to something understandable for Vietnamese readers, or omit them if irrelevant.
- Use language that aligns with real mental health conversations in Vietnamese forums or social media.
- Prioritize clarity and emotional resonance over literal accuracy.

Return only the translated Vietnamese content. Do not include labels like 'Post:', 'Question:',any variation or system explanations. Maintain paragraph and sentence structure when possible.
"""

# Prompt 2 — Formal explanation from a mental health expert for response_vi
RESPONSE_SYSTEM_PROMPT = """
You are a professional Vietnamese psychologist. Your task is to rewrite mental health assessments or reasoning into fluent, formal, and professional Vietnamese — similar to how a licensed therapist or psychologist would write in a report or academic context.

Key guidelines:
- Maintain a clinical tone while preserving compassion and psychological accuracy.
- Use formal Vietnamese language with appropriate mental health terminology (e.g., 'rối loạn lo âu', 'triệu chứng trầm cảm', 'cảm giác vô vọng', 'hành vi tự làm hại').
- Avoid slang or casual phrasing.
- Be precise and emotionally grounded.
- Return only the rewritten formal explanation in Vietnamese.
"""

EVALUATION_PROMPT = """
You are a professional Vietnamese clinical psychologist. Your task is to assess the severity of depressive symptoms based on the reasoning provided in English.

Rate the mental health condition on a scale from 0 to 5 using the following guide:

- 0 = No signs of depression or psychological distress
- 1 = Very mild signs (occasional low mood, manageable)
- 2 = Mild symptoms (persistent sadness, early warning signs)
- 3 = Moderate depression (functional impairment, emotional heaviness)
- 4 = Severe depression (hopelessness, disconnection, self-harm thoughts)
- 5 = Extreme or critical condition (suicidal ideation, extreme isolation)

Only return the number (e.g., `3`). Do not explain or elaborate. Respond with a single digit from 0 to 5.
"""


In [14]:
def translate_text(text, system_prompt, retries=1):
    """
    Translates the given input text into Vietnamese using an OpenAI chat model and a specified system prompt.

    Args:
        text (str): The input text to be translated.
        system_prompt (str): The system-level instruction that guides the model's translation behavior.
        retries (int, optional): Number of retry attempts in case the request fails. Default is 1.

    Returns:
        str: The translated Vietnamese text, or an error message if translation fails.
    """
    for attempt in range(retries):
        try:
            response = openai.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": f"Translate this into Vietnamese:\n{text}"}
                ],
                temperature=0.7
            )
            time.sleep(2)
            if response and hasattr(response, "choices") and response.choices:
                return response.choices[0].message.content
            else:
                print(" Malformed response:", response)
                return "Translation failed (no choices)"
        except Exception as e:
            print(f"Retry {attempt+1}/{retries} failed: {e}")
            time.sleep(2)
    return "Translation failed after retries"



In [16]:
def translate_dataframe(df, num_rows=None):
    """
    Translates 'post' and 'response' columns into Vietnamese, and generates a severity score.

    Args:
        df (pd.DataFrame): The input DataFrame with 'post' and 'response' columns.
        num_rows (int, optional): Number of rows to process. If None, process entire DataFrame.

    Returns:
        pd.DataFrame: A new DataFrame with 'post_vi', 'response_vi', and 'severity_score' columns.
    """
    df_sample = df.copy()
    
    if num_rows is not None:
        df_sample = df_sample.head(num_rows)

    for i in df_sample.index:
        print(f"Translating row {i+1}/{len(df_sample)}...")
        try:
            post_text = df_sample.at[i, 'post']
            response_text = df_sample.at[i, 'response']

            df_sample.at[i, 'post_vi'] = translate_text(post_text, POST_SYSTEM_PROMPT)
            df_sample.at[i, 'response_vi'] = translate_text(response_text, RESPONSE_SYSTEM_PROMPT)
            df_sample.at[i, 'severity_score'] = translate_text(response_text, EVALUATION_PROMPT)

        except Exception as e:
            print(f"Error in row {i}: {e}")
            df_sample.at[i, 'post_vi'] = "Translation failed"
            df_sample.at[i, 'response_vi'] = "Translation failed"
            df_sample.at[i, 'severity_score'] = "Translation failed"

    return df_sample


In [21]:
# Translate the entire DataFrame
translated_df = translate_dataframe(df, num_rows=1)

Translating row 1/1...


In [19]:
translated_df.head()


Unnamed: 0,post,condition,response,post_vi,response_vi,severity_score
0,5 Simple family activities to get and stay hea...,none,The post does not display any emotional indica...,\n5 hoạt động gia đình đơn giản để khỏe mạnh v...,\nBài viết không thể hiện bất kỳ dấu hiệu cảm ...,\n0\n


In [67]:
# Save translated sample
translated_df.to_csv("<your_name>_vi_sample.csv", index=False, encoding='utf-8-sig')
print(" Vietnamese sample saved to reddit_train_vi_sample.csv")

✅ Vietnamese sample saved to reddit_train_vi_sample.csv
