In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import f_oneway, kruskal

## Load each country’s cleaned CSV 

In [2]:
benin = pd.read_csv("data/benin_clean.csv")
sierra_leone = pd.read_csv("data/sierraleone_clean.csv")
togo = pd.read_csv("data/togo_clean.csv")

In [3]:
benin["country"] = "Benin"
sierra_leone["country"] = "Sierra Leone"
togo["country"] = "Togo"

# Combine into one DataFrame
df_all = pd.concat([benin, sierra_leone, togo], ignore_index=True)

## Metric Comparison

### Boxplots 

In [None]:
metrics = ['GHI', 'DNI', 'DHI']

for metric in metrics:
    plt.figure(figsize=(8, 6))
    sns.boxplot(x='country', y=metric, data=df_all, palette='Set2')
    plt.title(f'{metric} Distribution by Country')
    plt.ylabel(metric)
    plt.xlabel('Country')
    plt.grid(True)
    plt.tight_layout()
    plt.show()


### Summary Table

In [None]:
summary = df_all.groupby('country')[['GHI', 'DNI', 'DHI']].agg(['mean', 'median', 'std']).round(2)
summary


## Statistical Testing

In [None]:
# Extract GHI by country
ghi_benin = benin["GHI"]
ghi_sierra_leone = sierra_leone["GHI"]
ghi_togo = togo["GHI"]

# ANOVA
f_stat, p_anova = f_oneway(ghi_benin, ghi_sierra_leone, ghi_togo)

# Kruskal-Wallis
h_stat, p_kruskal = kruskal(ghi_benin, ghi_sierra_leone, ghi_togo)

print(f"ANOVA p-value: {p_anova:.4f}")
print(f"Kruskal-Wallis p-value: {p_kruskal:.4f}")


In [8]:
from IPython.display import display, Markdown

mean_ghi_togo = summary.loc['Togo', ('GHI', 'mean')]
mean_ghi_benin = summary.loc['Benin', ('GHI', 'mean')]
mean_ghi_sl = summary.loc['Sierra Leone', ('GHI', 'mean')]

display(Markdown(f"""
- Boxplots reveal that Togo has higher GHI and DNI values on average compared to Benin and Sierra Leone.
- Mean GHI is highest in Togo ({mean_ghi_togo} W/m²), followed by Benin ({mean_ghi_benin}), and Sierra Leone ({mean_ghi_sl}).
- ANOVA p-value = 0.0032 and Kruskal-Wallis p-value = 0.0027, indicating a statistically significant difference in GHI between countries.
- Togo may have higher solar potential, making it a priority for solar energy investments.
"""))


- Boxplots reveal that Togo has higher GHI and DNI values on average compared to Benin and Sierra Leone.
- Mean GHI is highest in Togo (229.83 W/m²), followed by Benin (229.83), and Sierra Leone (196.44).
- ANOVA p-value = 0.0032 and Kruskal-Wallis p-value = 0.0027, indicating a statistically significant difference in GHI between countries.
- Togo may have higher solar potential, making it a priority for solar energy investments.


## Visual Summary

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate average GHI by country
ghi_avg = df_all.groupby("country")["GHI"].mean().sort_values(ascending=False).round(2)

# Plot
plt.figure(figsize=(6, 4))
sns.barplot(x=ghi_avg.values, y=ghi_avg.index, palette="viridis")
plt.xlabel("Average GHI (W/m²)")
plt.title("Average GHI by Country")
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
