In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

# Load each country's cleaned dataset
benin = pd.read_csv("data/benin_clean.csv")
togo = pd.read_csv("data/togo_clean.csv")
sierra = pd.read_csv("data/sierra_clean.csv")

# Add country labels
benin['Country'] = 'Benin'
togo['Country'] = 'Togo'
sierra['Country'] = 'Sierra Leone'

# Combine all datasets
df_all = pd.concat([benin, togo, sierra])


In [None]:
# Boxplots of GHI, DNI, DHI by Country
for metric in ['GHI', 'DNI', 'DHI']:
    plt.figure(figsize=(8, 5))
    sns.boxplot(x='Country', y=metric, data=df_all)
    plt.title(f"{metric} Distribution by Country")
    plt.xlabel("Country")
    plt.ylabel(metric)
    plt.grid(True)
    plt.show()


In [None]:
summary = df_all.groupby("Country")[["GHI", "DNI", "DHI"]].agg(['mean', 'median', 'std']).round(2)
summary


In [None]:
anova_result = stats.f_oneway(
    benin['GHI'].dropna(),
    togo['GHI'].dropna(),
    sierra['GHI'].dropna()
)

print("ANOVA p-value for GHI:", anova_result.pvalue)
