In [None]:
# Import libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import f_oneway

In [None]:
# Load cleaned datasets
try:
    benin_df = pd.read_csv('../data/benin_clean.csv')
    sierra_df = pd.read_csv('../data/sierraleone_clean.csv')
    togo_df = pd.read_csv('../data/togo_clean.csv')
except FileNotFoundError as e:
    print(f"Error: {e}")
    exit()

In [None]:
# Add country labels
benin_df['Country'] = 'Benin'
sierra_df['Country'] = 'Sierra Leone'
togo_df['Country'] = 'Togo'
combined_df = pd.concat([benin_df, sierra_df, togo_df])

In [None]:
# Boxplots for GHI, DNI, DHI
for metric in ['GHI', 'DNI', 'DHI']:
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='Country', y=metric, data=combined_df, palette='Set2')
    plt.title(f'{metric} Comparison Across Countries')
    plt.savefig(f'../figures/{metric}_boxplot.png')
    plt.show()

In [None]:
# Summary Table
summary_stats = pd.DataFrame({
    'Country': ['Benin', 'Sierra Leone', 'Togo'],
    'GHI_Mean': [benin_df['GHI'].mean(), sierra_df['GHI'].mean(), togo_df['GHI'].mean()],
    'GHI_Median': [benin_df['GHI'].median(), sierra_df['GHI'].median(), togo_df['GHI'].median()],
    'GHI_Std': [benin_df['GHI'].std(), sierra_df['GHI'].std(), togo_df['GHI'].std()],
    'DNI_Mean': [benin_df['DNI'].mean(), sierra_df['DNI'].mean(), togo_df['DNI'].mean()],
    'DNI_Median': [benin_df['DNI'].median(), sierra_df['DNI'].median(), togo_df['DNI'].median()],
    'DNI_Std': [benin_df['DNI'].std(), sierra_df['DNI'].std(), togo_df['DNI'].std()],
    'DHI_Mean': [benin_df['DHI'].mean(), sierra_df['DHI'].mean(), togo_df['DHI'].mean()],
    'DHI_Median': [benin_df['DHI'].median(), sierra_df['DHI'].median(), togo_df['DHI'].median()],
    'DHI_Std': [benin_df['DHI'].std(), sierra_df['DHI'].std(), togo_df['DHI'].std()]
})

In [None]:
print("Summary Statistics Across Countries:")
display(summary_stats)
summary_stats.to_csv('../data/summary_stats.csv', index=False)

In [None]:
# ANOVA for GHI
f_stat, p_value = f_oneway(benin_df['GHI'], sierra_df['GHI'], togo_df['GHI'])
print(f"ANOVA for GHI: F-statistic = {f_stat:.2f}, p-value = {p_value:.4f}")


In [None]:
# Bar Chart for Average GHI
plt.figure(figsize=(8, 5))
sns.barplot(x='Country', y='GHI_Mean', data=summary_stats, palette='Set2')
plt.title('Average GHI by Country')
plt.savefig('../figures/ghi_ranking.png')
plt.show()