In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import f_oneway  # for ANOVA

benin = pd.read_csv("../data/benin_clean.csv")
sierra = pd.read_csv("../data/sierraleone_clean.csv")
togo = pd.read_csv("../data/togo_clean.csv")

benin["Country"] = "Benin"
sierra["Country"] = "Sierra Leone"
togo["Country"] = "Togo"

df = pd.concat([benin, sierra, togo], ignore_index=True)

df.head()


In [None]:
stats = df.groupby("Country")[["GHI", "DNI", "DHI"]].agg(["mean", "median", "std"])
stats


In [None]:
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
sns.boxplot(data=df, x="Country", y="GHI")
plt.title("GHI Comparison")

plt.subplot(1, 3, 2)
sns.boxplot(data=df, x="Country", y="DNI")
plt.title("DNI Comparison")

plt.subplot(1, 3, 3)
sns.boxplot(data=df, x="Country", y="DHI")
plt.title("DHI Comparison")

plt.tight_layout()
plt.show()


In [None]:
cd C:\Users\utopi\solar-challenge-week1
.venv\Scripts\activate
pip install scipy


In [None]:
ghi_benin = benin["GHI"]
ghi_sierra = sierra["GHI"]
ghi_togo = togo["GHI"]

f_stat, p_value = f_oneway(ghi_benin, ghi_sierra, ghi_togo)

print("ANOVA Test for GHI Across Countries")
print("F-statistic:", f_stat)
print("p-value:", p_value)


In [None]:
avg_ghi = df.groupby("Country")["GHI"].mean().sort_values(ascending=False)

plt.figure(figsize=(7,4))
sns.barplot(x=avg_ghi.index, y=avg_ghi.values)
plt.title("Average GHI Ranking Across Countries")
plt.ylabel("Average GHI (W/mÂ²)")
plt.show()
