In [None]:
import pandas as pd

benin = pd.read_csv('data/benin_clean.csv')
togo = pd.read_csv('data/togo_clean.csv')
sierra = pd.read_csv('data/sierra_leone_clean.csv')

# Add country column
benin['Country'] = 'Benin'
togo['Country'] = 'Togo'
sierra['Country'] = 'Sierra Leone'

# Combine all
df_all = pd.concat([benin, togo, sierra], ignore_index=True)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

metrics = ['GHI', 'DNI', 'DHI']

for metric in metrics:
    plt.figure(figsize=(8,5))
    sns.boxplot(x='Country', y=metric, data=df_all)
    plt.title(f'{metric} Comparison Across Countries')
    plt.show()


In [None]:
summary = df_all.groupby('Country')[['GHI', 'DNI', 'DHI']].agg(['mean', 'median', 'std']).round(2)
summary


In [None]:
from scipy.stats import f_oneway

f_stat, p_val = f_oneway(
    benin['GHI'],
    togo['GHI'],
    sierra['GHI']
)

print(f"F-statistic: {f_stat:.2f}, p-value: {p_val:.4f}")


In [None]:
- Benin shows the highest average GHI, indicating strong solar potential.
- Sierra Leone has more variability in DHI compared to others.
- Togo displays the lowest median DNI, suggesting less direct solar radiation.


In [None]:
avg_ghi = df_all.groupby('Country')['GHI'].mean().sort_values()

avg_ghi.plot(kind='barh', figsize=(6,4), title="Average GHI per Country")
plt.xlabel("GHI")
plt.show()
