# Data Translation

This notebook translates column values from Spanish/Catalan to English for better readability and international collaboration. The translations include:
- Sex categories (Catalan → English)
- ATC Level 1 drug classification system (Spanish → English)
- ATC Level 3 nervous system drug categories (Spanish → English)

In [None]:
# Translate Catalan sex labels to English
df['sexe'] = df['sexe'].replace({'Dona': 'Women', 'Home': 'Men'})

# Translate Spanish ATC Level 1 labels to English
atc1_translations = {
    'TRACTO ALIMENTARIO Y METABOLISMO': 'ALIMENTARY TRACT AND METABOLISM',
    'SANGRE Y ORGANOS HEMATOPOYETICOS': 'BLOOD AND BLOOD FORMING ORGANS',
    'SISTEMA CARDIOVASCULAR': 'CARDIOVASCULAR SYSTEM',
    'DERMATOLOGICOS': 'DERMATOLOGICALS',
    'SISTEMA GENITOURINARIO Y HORMONAS SEXUALES': 'GENITO-URINARY SYSTEM AND SEX HORMONES',
    'PREPARADOS HORMONALES SISTEMICOS, EXCLUYENDO HORMONAS SEXUALES E INSULINAS': 'SYSTEMIC HORMONAL PREPARATIONS, EXCL. SEX HORMONES AND INSULINS',
    'ANTIINFECCIOSOS PARA USO SISTEMICO': 'ANTIINFECTIVES FOR SYSTEMIC USE',
    'AGENTES ANTINEOPLASICOS E INMUNOMODULADORES': 'ANTINEOPLASTIC AND IMMUNOMODULATING AGENTS',
    'SISTEMA MUSCULOESQUELETICO': 'MUSCULO-SKELETAL SYSTEM',
    'SISTEMA NERVIOSO': 'NERVOUS SYSTEM',
    'PRODUCTOS ANTIPARASITARIOS, INSECTICIDAS Y REPELENTES': 'ANTIPARASITIC PRODUCTS, INSECTICIDES AND REPELLENTS',
    'SISTEMA RESPIRATORIO': 'RESPIRATORY SYSTEM',
    'ORGANOS DE LOS SENTIDOS': 'SENSORY ORGANS' 
}
df['grup ATC nivell 1'] = df['grup ATC nivell 1'].replace(atc1_translations)

# Mental Health Drug Analysis - Women

This section analyzes mental health drug prescriptions specifically for women in the dataset. It filters the data to include only female patients and focuses on seven categories of mental health medications (antipsychotics, anxiolytics, hypnotics/sedatives, antidepressants, psychostimulants, combination drugs, and anti-dementia drugs) based on ATC Level 3 classification codes N05 and N06.

In [None]:
# Filter data for women only
df_women = df[df['sexe'] == 'Women'].copy()

print(f"Total records for women: {len(df_women):,}")
print(f"Percentage of total dataset: {100*len(df_women)/len(df):.2f}%")
print(f"\nYears covered: {sorted(df_women['any'].unique())}")
print(f"Age groups: {sorted(df_women["grup d\'edat"].unique())}")

# Define mental health drug categories (ATC Level 3 codes)
mental_health_codes = {
    'N05A': 'Antipsychotics',
    'N05B': 'Anxiolytics',
    'N05C': 'Hypnotics and Sedatives',
    'N06A': 'Antidepressants',
    'N06B': 'Psychostimulants/ADHD/Nootropics',
    'N06C': 'Psycholeptics+Psychoanaleptics Combined',
    'N06D': 'Anti-Dementia Drugs'
}

# Create a function to extract ATC Level 3 code (first 4 characters)
df_women['ATC_Level3'] = df_women['grup ATC nivell 3'].str[:4]

# Filter for mental health drugs
df_women_mh = df_women[df_women['ATC_Level3'].isin(mental_health_codes.keys())].copy()

# Map codes to full names
df_women_mh['MH_Category'] = df_women_mh['ATC_Level3'].map(mental_health_codes)

print(f"\n{'Mental Health Drug Records for Women:':-^80}")
print(f"Total MH drug records: {len(df_women_mh):,}")
print(f"Percentage of women's total: {100*len(df_women_mh)/len(df_women):.2f}%")
print(f"\nBreakdown by category:")
for code, name in mental_health_codes.items():
    count = len(df_women_mh[df_women_mh['ATC_Level3'] == code])
    pct = 100 * count / len(df_women_mh) if len(df_women_mh) > 0 else 0
    print(f"  {code} - {name:<45}: {count:>10,} records ({pct:>5.2f}%)")