In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patheffects as path_effects

# Load the medical doctors dataset
file_path = 'meddoctors.csv'
data = pd.read_csv(file_path)

# Drop missing data in relevant columns
data = data.dropna(subset=['Countries, territories and areas',
                           'Year', 'Medical doctors (per 10 000 population)',
                           'Medical doctors (number)'])

# Set the plot style and font sizes
sns.set(style="whitegrid")
plt.rcParams['font.size'] = 18
plt.rcParams['axes.labelsize'] = 20
plt.rcParams['axes.titlesize'] = 22
plt.rcParams['xtick.labelsize'] = 16
plt.rcParams['ytick.labelsize'] = 14

# Select the desired countries for detailed analysis
selected_countries = ["United States of America", "India", "China", "Brazil", "Germany"]
filtered_data = data[data['Countries, territories and areas'].isin(selected_countries)]

# Aggregate data by country and year
grouped_data = filtered_data.groupby(['Countries, territories and areas', 'Year']).mean().reset_index()

# Plot with adjusted layout
fig, axes = plt.subplots(2, 2, figsize=(20, 20))  # Adjust figure size as needed

# Select custom color palette for line plot
palette = sns.color_palette("bright")


# Plot 1: Average number of medical doctors over the years for selected countries
for i, country in enumerate(selected_countries):
    country_data = grouped_data[grouped_data['Countries, territories and areas']
                                == country]
    sns.lineplot(x='Year', y='Medical doctors (per 10 000 population)', data=country_data,
                 label=country, ax=axes[0, 0], linewidth=8, color=palette[i % len(palette)])
axes[0, 0].set_title('Average Medical Doctors per 10,000 Population Over Years', fontweight='bold')
axes[0, 0].set_xlabel('Year')
axes[0, 0].set_ylabel('Doctors per 10,000 Population')

# Add edge and shadow to lineplot 
for line in axes[0, 0].get_lines():
    line.set_path_effects([
        path_effects.SimpleLineShadow(shadow_color="black", linewidth=8),
        path_effects.Normal()
    ])

# Plot 2: Pie chart showing the total number of doctors for each selected country
colors = ["#FF9999", "#66B2FF", "magenta", "#99FF99", "#FFCC99"]  # Custom colors including magenta
total_doctors = grouped_data.groupby('Countries, territories and areas')['Medical doctors (number)'].sum()
wedges, texts, autotexts = axes[0, 1].pie(total_doctors, labels=total_doctors.index,
                                          autopct='%1.1f%%', explode=(0.2, 0.2, 0.2, 0.2, 0.2), colors=colors)
axes[0, 1].set_title('Proportion of Medical Doctors in Selected Countries', fontweight='bold')

# Add shadows to pie chart
for wedge in wedges:
    wedge.set_edgecolor('black')

# Plot 3: Histogram of the number of medical doctors/10,000 population across all countries with mean and std
mean_value = data['Medical doctors (per 10 000 population)'].mean()
std_value = data['Medical doctors (per 10 000 population)'].std()
sns.histplot(data['Medical doctors (per 10 000 population)'], kde=True, linewidth=4, color="green", ax=axes[1, 0])
axes[1, 0].axvline(mean_value, color='magenta', linestyle='--', label=f'Mean: {mean_value:.2f}')
axes[1, 0].axvline(mean_value + std_value, color='k', linestyle='--', linewidth=2,
                   label=f'Mean + Std: {mean_value + std_value:.2f}')
axes[1, 0].axvline(mean_value - std_value, color='r', linestyle='--',
                   linewidth=2, label=f'Mean - Std: {mean_value - std_value:.2f}')
axes[1, 0].set_title('Distribution of Doctors per 10,000 Population', fontweight='bold')
axes[1, 0].set_xlabel('Doctors per 10,000 Population')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].legend()

# Add shadows to histogram bars 
for i in range(len(axes[1, 0].patches)):
    p = axes[1, 0].patches[i]
    axes[1, 0].bar(p.get_x(), p.get_height(), 
                   width=p.get_width(), alpha=0.7, color='black', zorder=0)

# Plot 4: Boxplot of the number of medical doctors per 10,000 population for selected countries
sns.boxplot(x='Countries, territories and areas', y='Medical doctors (per 10 000 population)',
            data=filtered_data, ax=axes[1, 1], palette="Set2")
axes[1, 1].set_title('Medical Doctors per 10,000 Population in Selected Countries', fontweight='bold')
axes[1, 1].set_xlabel('Country')
axes[1, 1].set_ylabel('Doctors per 10,000 Population')

# Add depth to boxplot
for i, artist in enumerate(axes[1, 1].artists):
    artist.set_edgecolor('black')
    for j in range(i * 9, i * 9 + 9):
        line = axes[2, 2].lines[j]
        line.set_color('black')
        line.set_mfc('black')
        line.set_mec('black')
        
plt.subplots_adjust(left=0.1, right=0.9, top=0.90, bottom=0.25)

# Include Main title, my name and student ID
fig.suptitle('Healthcare Workforce Analysis: A Global Perspective', fontsize=40, fontweight='bold', color='navy', x=0.5)
fig.text(0.5, 0.93, 'Ekene Pauline Anugha-Ugochukwu : 22045525', ha='center', va='center', fontsize=30, color='navy', fontweight='regular')
plt.subplots_adjust(top=0.88)

# Add my report text
report_text = str("The infographics presents a comprehensive view of the medical"
                  " doctors' density and distribution across five selected" 
                  " countries: the United States of America, India, China," 
                  " Brazil, and Germany.The line graph depicts a clear upward"
                  " trend in the average number of medical doctors per 10,000"
                  " people for all selected countries over a span of"
                  " approximately 30 years, starting from 1990. The pie chart"
                  " illustrates that China comprises the majority share, with "
                  " 71.7% of the total number of medical doctors among the"
                  " countries observed while Brazil makes up the smallest"
                  " share with 3.9%. The histogram shows that on average,"
                  " there are approximately 21 medical doctors for every"
                  " 10,000 people across the dataset, however, even though"
                  " the majority of the countries have a lower density of"
                  " doctors, there are a few countries with a very high density."
                  " From the box plots, it is evident that Germany and the United"
                  " States have higher medians compared to Brazil and India,"
                  " indicating a greater density of medical professionals."
                  " The range of values, particularly the interquartile range"
                  " is most variable for the United States, suggesting diversity"
                  " in the distribution of doctors across different areas within"
                  " the country.\n"
                  " In summary,although the number of medical doctors is just"
                  " one indicator of healthcare system capacity,countries with"
                  " more medical doctors per capita are typically better equipped"
                  " to handle health emergencies, such as pandemics or natural"
                  " disasters, due to a more robust healthcare workforce.")

plt.figtext(0.5, 0.02, report_text, ha="center", fontsize=20, wrap=True,
            bbox=dict(facecolor='lightpink', alpha=0.5, boxstyle='round,pad=1'))


# Show the plot
plt.show()
