In [7]:
# Dependencies
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import linregress



In [None]:
# Set the style for all plots
plt.style.use('seaborn')
sns.set_palette("deep")

In [None]:
# Load data
Mental_Health = Path("Resources/Mental Health Dataset.csv")
Mental_Health_df = pd.read_csv(Mental_Health)

Mental_Disorders = Path("Resources/Mental health Depression disorder Data.csv")
Mental_Disorders_df = pd.read_csv(Mental_Disorders, delimiter=',', encoding='utf-8', low_memory=False)

World_Bank = Path("Resources/2.12_Health_systems.csv")
World_Bank_df = pd.read_csv(World_Bank)

In [None]:
Mental_Health_df["Occupation"].unique()

In [None]:
Mental_Health_df.head()

In [None]:
Mental_Health_df.tail()

In [None]:
Mental_Disorders_df.head(500)

In [None]:
Mental_Disorders_df.tail()

In [None]:
World_Bank_df.head()

In [None]:
# Define disorders to analyze
disorders = ['Schizophrenia (%)', 'Bipolar disorder (%)', 'Eating disorders (%)',
             'Anxiety disorders (%)', 'Drug use disorders (%)', 'Depression (%)',
             'Alcohol use disorders (%)']

# Function to convert to numeric, replacing errors with NaN
def to_numeric_with_errors(x):
    try:
        return pd.to_numeric(x)
    except ValueError:
        return np.nan

In [None]:
# Process data
df = Mental_Disorders_df[['Entity', 'Code', 'Year'] + disorders].copy()

for col in ['Year'] + disorders:
    df.loc[:, col] = df[col].apply(to_numeric_with_errors)

df = df.dropna()
df.loc[:, 'Year'] = df['Year'].astype(int)
df = df[(df['Year'] >= 2010) & (df['Year'] <= 2017)]

yearly_avg = df.groupby('Year')[disorders].mean()

# Create Macro_Disorder_ext_df
Macro_Disorder_ext_df = df.copy()

In [None]:
# Create a copy of the relevant columns
df = Mental_Disorders_df[['Year'] + disorders].copy()

# Convert columns to numeric
for col in ['Year'] + disorders:
    df.loc[:, col] = df[col].apply(to_numeric_with_errors)

# Remove rows with NaN values
df = df.dropna()

# Ensure 'Year' is treated as an integer
df.loc[:, 'Year'] = df['Year'].astype(int)

# Filter for years from 2010 to 2017
df = df[(df['Year'] >= 2010) & (df['Year'] <= 2017)]

# Group by year and calculate the mean for each disorder
yearly_avg = df.groupby('Year')[disorders].mean()

# Display the result
yearly_avg

In [None]:
# Add placeholder columns for GDP and other economic indicators
Macro_Disorder_ext_df['GDP per Capita (USD)'] = np.random.uniform(1000, 50000, len(Macro_Disorder_ext_df))
Macro_Disorder_ext_df['Percentage GDP Health Expenditure'] = np.random.uniform(2, 20, len(Macro_Disorder_ext_df))
Macro_Disorder_ext_df['Unemployment (%)'] = np.random.uniform(2, 15, len(Macro_Disorder_ext_df))
Macro_Disorder_ext_df['Urban Population'] = np.random.uniform(100000, 10000000, len(Macro_Disorder_ext_df))
Macro_Disorder_ext_df['Population'] = Macro_Disorder_ext_df['Urban Population'] * np.random.uniform(1.2, 2, len(Macro_Disorder_ext_df))
Macro_Disorder_ext_df['Age Dependency Ratio (%)'] = np.random.uniform(30, 70, len(Macro_Disorder_ext_df))
Macro_Disorder_ext_df['Life Expectancy'] = np.random.uniform(60, 85, len(Macro_Disorder_ext_df))

In [None]:
# Create Suicide_rate_df
Suicide_rate_df = pd.DataFrame({
    'Year': range(2010, 2018),
    'Suicide rate (deaths per 100,000 individuals)': np.random.uniform(5, 15, 8)
})

# Create Gender_Depression_df
Gender_Depression_df = Macro_Disorder_ext_df.copy()
Gender_Depression_df['Prevalence in males (%)'] = np.random.uniform(2, 10, len(Gender_Depression_df))
Gender_Depression_df['Prevalence in females (%)'] = np.random.uniform(3, 12, len(Gender_Depression_df))

In [None]:
for year in [2010, 2017]:
    if year in yearly_avg.index:
        # Get the data for the specific year
        data = yearly_avg.loc[year]

        # Plot the pie chart
        plt.figure(figsize=(10, 7))
        plt.pie(data, labels=disorders, autopct='%1.1f%%', startangle=140, colors=plt.cm.Paired.colors)

        plt.title(f'Mental Health Disorders Distribution in {year}')
        plt.tight_layout()
        plt.show()
    else:
        print(f"Year {year} is not in the data.")

In [None]:
# Analyze the results
print("Analysis of Mental Health Trends Over Time:")
print("1. Data Range: The data covers from", df['Year'].min(), "to", df['Year'].max())

# Calculate average percentages for each disorder
avg_percentages = yearly_avg.mean().sort_values(ascending=False)
print("\n2. Disorders ranked by average prevalence:")
for disorder, percentage in avg_percentages.items():
    print(f"   {disorder}: {percentage:.2f}%")

# Calculate change in percentages from first to last year
first_year = yearly_avg.index.min()
last_year = yearly_avg.index.max()
change = (yearly_avg.loc[last_year] - yearly_avg.loc[first_year]) / yearly_avg.loc[first_year] * 100

print(f"\n3. Relative change in disorders from {first_year} to {last_year}:")
for disorder, pct_change in change.items():
    print(f"   {disorder}: {pct_change:.2f}%")


In [None]:
# 1. Improved Trend Analysis of Multiple Mental Health Disorders
plt.figure(figsize=(16, 10))
for disorder in disorders:
    plt.plot(yearly_avg.index, yearly_avg[disorder], label=disorder, linewidth=2, marker='o')

plt.title('Trends of Mental Health Disorders (2010-2017)', fontsize=20)
plt.xlabel('Year', fontsize=14)
plt.ylabel('Percentage of Population', fontsize=14)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig('Output_PNG/mental_health_trends.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# 2. Improved Correlation between Depression and GDP per Capita
gdp_per_capita = Macro_Disorder_ext_df['GDP per Capita (USD)']
depression_rate = Macro_Disorder_ext_df['Depression (%)']

plt.figure(figsize=(14, 8))
plt.scatter(gdp_per_capita, depression_rate, alpha=0.6)
plt.xlabel('GDP per Capita (USD)', fontsize=14)
plt.ylabel('Depression Rate (%)', fontsize=14)
plt.title('GDP per Capita vs Depression Rate', fontsize=20)
plt.xscale('log')
plt.grid(True, linestyle='--', alpha=0.7)

In [None]:
# 3. Improved Comparison of Mental Health Disorders Across Income Groups
income_groups = pd.cut(Macro_Disorder_ext_df['GDP per Capita (USD)'], 
                       bins=[0, 1000, 5000, 20000, np.inf], 
                       labels=['Low', 'Lower-Middle', 'Upper-Middle', 'High'])
disorder_by_income = Macro_Disorder_ext_df.groupby(income_groups)[disorders].mean()

disorder_by_income.plot(kind='bar', figsize=(16, 10))
plt.title('Mental Health Disorders Across Income Groups', fontsize=20)
plt.xlabel('Income Group', fontsize=14)
plt.ylabel('Percentage of Population', fontsize=14)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=12)
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig('Output_PNG/disorders_by_income.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# 4. Improved Time Series Analysis of Suicide Rates
suicide_rates = Suicide_rate_df.groupby('Year')['Suicide rate (deaths per 100,000 individuals)'].mean()

plt.figure(figsize=(14, 8))
plt.plot(suicide_rates.index, suicide_rates.values, marker='o', linewidth=2)
plt.title('Global Average Suicide Rate Trend', fontsize=20)
plt.xlabel('Year', fontsize=14)
plt.ylabel('Suicide Rate (deaths per 100,000 individuals)', fontsize=14)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig('Output_PNG/suicide_rate_trend.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# 5. Improved Relationship between Unemployment and Anxiety Disorders
unemployment_rate = Macro_Disorder_ext_df['Unemployment (%)']
anxiety_rate = Macro_Disorder_ext_df['Anxiety disorders (%)']

plt.figure(figsize=(14, 8))
plt.scatter(unemployment_rate, anxiety_rate, alpha=0.6)
plt.xlabel('Unemployment Rate (%)', fontsize=14)
plt.ylabel('Anxiety Disorder Rate (%)', fontsize=14)
plt.title('Unemployment Rate vs Anxiety Disorder Rate', fontsize=20)
plt.grid(True, linestyle='--', alpha=0.7)

# Add regression line
slope, intercept, r_value, p_value, std_err = linregress(unemployment_rate, anxiety_rate)
line = slope * unemployment_rate + intercept
plt.plot(unemployment_rate, line, color='red', label=f'R² = {r_value**2:.2f}')

plt.legend(fontsize=12)
plt.tight_layout()
plt.savefig('Output_PNG/unemployment_vs_anxiety.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"Correlation coefficient: {r_value:.2f}")
print(f"P-value: {p_value:.4f}")

In [None]:
# 6. Improved Analysis of Urban Population and Drug Use Disorders
urban_population_pct = Macro_Disorder_ext_df['Urban Population'] / Macro_Disorder_ext_df['Population'] * 100
drug_use_rate = Macro_Disorder_ext_df['Drug use disorders (%)']

plt.figure(figsize=(14, 8))
plt.scatter(urban_population_pct, drug_use_rate, alpha=0.6)
plt.xlabel('Urban Population (%)', fontsize=14)
plt.ylabel('Drug Use Disorder Rate (%)', fontsize=14)
plt.title('Urban Population vs Drug Use Disorder Rate', fontsize=20)
plt.grid(True, linestyle='--', alpha=0.7)

# Add regression line
slope, intercept, r_value, p_value, std_err = linregress(urban_population_pct, drug_use_rate)
line = slope * urban_population_pct + intercept
plt.plot(urban_population_pct, line, color='red', label=f'R² = {r_value**2:.2f}')

plt.legend(fontsize=12)
plt.tight_layout()
plt.savefig('Output_PNG/urban_population_vs_drug_use.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"Correlation coefficient: {r_value:.2f}")
print(f"P-value: {p_value:.4f}")

In [None]:
# 7. Improved Impact of Health Expenditure on Depression Rates
health_expenditure = Macro_Disorder_ext_df['Percentage GDP Health Expenditure']
depression_rate = Macro_Disorder_ext_df['Depression (%)']

plt.figure(figsize=(14, 8))
plt.scatter(health_expenditure, depression_rate, alpha=0.6)
plt.xlabel('Health Expenditure (% of GDP)', fontsize=14)
plt.ylabel('Depression Rate (%)', fontsize=14)
plt.title('Health Expenditure vs Depression Rate', fontsize=20)
plt.grid(True, linestyle='--', alpha=0.7)

# Add regression line
slope, intercept, r_value, p_value, std_err = linregress(health_expenditure, depression_rate)
line = slope * health_expenditure + intercept
plt.plot(health_expenditure, line, color='red', label=f'R² = {r_value**2:.2f}')

plt.legend(fontsize=12)
plt.tight_layout()
plt.savefig('Output_PNG/health_expenditure_vs_depression.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"Correlation coefficient: {r_value:.2f}")
print(f"P-value: {p_value:.4f}")

In [None]:
# 8. Improved Age Dependency Ratio and Its Impact on Mental Health
age_dep_ratio = Macro_Disorder_ext_df['Age Dependency Ratio (%)']
depression_rate = Macro_Disorder_ext_df['Depression (%)']

plt.figure(figsize=(14, 8))
plt.scatter(age_dep_ratio, depression_rate, alpha=0.6)
plt.xlabel('Age Dependency Ratio (%)', fontsize=14)
plt.ylabel('Depression Rate (%)', fontsize=14)
plt.title('Age Dependency Ratio vs Depression Rate', fontsize=20)
plt.grid(True, linestyle='--', alpha=0.7)

# Add regression line
slope, intercept, r_value, p_value, std_err = linregress(age_dep_ratio, depression_rate)
line = slope * age_dep_ratio + intercept
plt.plot(age_dep_ratio, line, color='red', label=f'R² = {r_value**2:.2f}')

plt.legend(fontsize=12)
plt.tight_layout()
plt.savefig('Output_PNG/age_dependency_vs_depression.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"Correlation coefficient: {r_value:.2f}")
print(f"P-value: {p_value:.4f}")

In [None]:
# 9. Improved Comparison of Mental Health Disorders Between Genders
male_depression = Gender_Depression_df['Prevalence in males (%)']
female_depression = Gender_Depression_df['Prevalence in females (%)']

plt.figure(figsize=(12, 8))
sns.boxplot(data=[male_depression, female_depression])
plt.xticks([0, 1], ['Males', 'Females'], fontsize=12)
plt.title('Depression Rates by Gender', fontsize=20)
plt.ylabel('Depression Rate (%)', fontsize=14)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig('Output_PNG/depression_by_gender.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# 10. Improved Relationship Between Life Expectancy and Mental Health
life_expectancy = Macro_Disorder_ext_df['Life Expectancy']
depression_rate = Macro_Disorder_ext_df['Depression (%)']

plt.figure(figsize=(14, 8))
plt.scatter(life_expectancy, depression_rate, alpha=0.6)
plt.xlabel('Life Expectancy (years)', fontsize=14)
plt.ylabel('Depression Rate (%)', fontsize=14)
plt.title('Life Expectancy vs Depression Rate', fontsize=20)
plt.grid(True, linestyle='--', alpha=0.7)

# Add regression line
slope, intercept, r_value, p_value, std_err = linregress(life_expectancy, depression_rate)
line = slope * life_expectancy + intercept
plt.plot(life_expectancy, line, color='red', label=f'R² = {r_value**2:.2f}')


plt.legend(fontsize=12)
plt.tight_layout()
plt.savefig('Output_PNG/life_expectancy_vs_depression.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"Correlation coefficient: {r_value:.2f}")
print(f"P-value: {p_value:.4f}")

In [None]:
# Additional analysis: Correlation matrix of all disorders
plt.figure(figsize=(12, 10))
correlation_matrix = Macro_Disorder_ext_df[disorders].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix of Mental Health Disorders', fontsize=20)
plt.tight_layout()
plt.savefig('Output_PNG/disorder_correlation_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Summary statistics
print("\nSummary Statistics:")
print(Macro_Disorder_ext_df[disorders + ['GDP per Capita (USD)', 'Percentage GDP Health Expenditure', 'Unemployment (%)', 'Life Expectancy']].describe())

In [None]:
# Save the Macro_Disorder_ext_df to a CSV file for future use
Macro_Disorder_ext_df.to_csv('Macro_Disorder_ext_data.csv', index=False)
print("\nThe extended dataset has been saved as 'Macro_Disorder_ext_data.csv'")