In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import kruskal
import os

# Load cleaned data
benin = pd.read_csv('data/benin_clean.csv')
togo = pd.read_csv('data/togo_clean.csv')
sierraleone = pd.read_csv('data/sierraleone_clean.csv')

# Boxplots
plt.figure(figsize=(12, 6))
for i, metric in enumerate(['GHI', 'DNI', 'DHI'], 1):
    plt.subplot(1, 3, i)
    data = pd.concat([
        benin[metric].rename('Benin'),
        togo[metric].rename('Togo'),
        sierraleone[metric].rename('Sierra Leone')
    ], axis=1)
    sns.boxplot(data=data)
    plt.title(f'{metric} Comparison')
plt.tight_layout()
plt.savefig('plots/comparison_boxplots.png')
plt.close()

# Summary Table
summary = pd.DataFrame({
    'Metric': ['GHI', 'DNI', 'DHI'],
    'Benin_Mean': [benin['GHI'].mean(), benin['DNI'].mean(), benin['DHI'].mean()],
    'Benin_Median': [benin['GHI'].median(), benin['DNI'].median(), benin['DHI'].median()],
    'Benin_SD': [benin['GHI'].std(), benin['DNI'].std(), benin['DHI'].std()],
    'Togo_Mean': [togo['GHI'].mean(), togo['DNI'].mean(), togo['DHI'].mean()],
    'Togo_Median': [togo['GHI'].median(), togo['DNI'].median(), togo['DHI'].median()],
    'Togo_SD': [togo['GHI'].std(), togo['DNI'].std(), togo['DHI'].std()],
    'SierraLeone_Mean': [sierraleone['GHI'].mean(), sierraleone['DNI'].mean(), sierraleone['DHI'].mean()],
    'SierraLeone_Median': [sierraleone['GHI'].median(), sierraleone['DNI'].median(), sierraleone['DHI'].median()],
    'SierraLeone_SD': [sierraleone['GHI'].std(), sierraleone['DNI'].std(), sierraleone['DHI'].std()]
})
summary.to_csv('data/comparison_summary.csv', index=False)

# Statistical Testing
ghi_stat, ghi_p = kruskal(benin['GHI'], togo['GHI'], sierraleone['GHI'])
print(f"Kruskal-Wallis GHI p-value: {ghi_p}")

# Bar Chart
mean_ghi = pd.DataFrame({
    'Country': ['Benin', 'Togo', 'Sierra Leone'],
    'Mean_GHI': [benin['GHI'].mean(), togo['GHI'].mean(), sierraleone['GHI'].mean()]
})
sns.barplot(x='Country', y='Mean_GHI', data=mean_ghi)
plt.title('Average GHI by Country')
plt.savefig('plots/ghi_ranking.png')
plt.close()

# Key Observations
print("""
### Key Observations
- Benin shows the highest median GHI but with moderate variability, suggesting stable solar potential.
- Togo has lower GHI but less variability, indicating consistent but less intense solar resources.
- Sierra Leone exhibits the highest variability in GHI, likely due to climatic factors, but has competitive mean values.
""")