In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import f_oneway, kruskal

%matplotlib inline
sns.set(style="whitegrid")


In [None]:
benin = pd.read_csv('../data/benin_clean.csv')
sierra_leone = pd.read_csv('../data/sierra_leone_clean.csv')
togo = pd.read_csv('../data/togo_clean.csv')

benin['Country'] = 'Benin'
sierra_leone['Country'] = 'Sierra Leone'
togo['Country'] = 'Togo'

df = pd.concat([benin, sierra_leone, togo], ignore_index=True)


In [None]:
plt.figure(figsize=(16, 5))
for i, var in enumerate(['GHI', 'DNI', 'DHI']):
    plt.subplot(1, 3, i+1)
    sns.boxplot(data=df, x='Country', y=var, palette="Set2")
    plt.title(f'{var} Distribution by Country')
plt.tight_layout()
plt.show()


In [None]:
metrics = ['GHI', 'DNI', 'DHI']
summary = df.groupby('Country')[metrics].agg(['mean', 'median', 'std']).round(2)
summary


In [None]:
ghi_groups = [benin['GHI'].dropna(), sierra_leone['GHI'].dropna(), togo['GHI'].dropna()]
anova_result = f_oneway(*ghi_groups)
print(f"ANOVA result: F = {anova_result.statistic:.2f}, p = {anova_result.pvalue:.4f}")

kruskal_result = kruskal(*ghi_groups)
print(f"Kruskal-Wallis result: H = {kruskal_result.statistic:.2f}, p = {kruskal_result.pvalue:.4f}")


In [None]:
plt.figure(figsize=(6, 4))
sns.barplot(data=df, x='Country', y='GHI', estimator='mean', ci='sd', palette='pastel')
plt.title('Average GHI by Country')
plt.ylabel('Mean GHI')
plt.show()


### 🔍 Key Observations

- Benin has the highest GHI median.
- Sierra Leone exhibits the lowest solar radiation.
- The differences are statistically significant (p < 0.05).
