In [None]:
# Import libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import f_oneway

# Load cleaned data
benin = pd.read_csv('../data/benin_clean.csv')
togo = pd.read_csv('../data/togo_clean.csv')
sierra = pd.read_csv('../data/sierra_leone_clean.csv')

# Add country column and concatenate
benin['country'] = 'Benin'
togo['country'] = 'Togo'
sierra['country'] = 'Sierra Leone'
df = pd.concat([benin, togo, sierra])

# Boxplots for GHI, DNI, DHI
sns.boxplot(data=df, x='country', y='GHI')
plt.title('GHI Distribution by Country')
plt.show()

sns.boxplot(data=df, x='country', y='DNI')
plt.title('DNI Distribution by Country')
plt.show()

sns.boxplot(data=df, x='country', y='DHI')
plt.title('DHI Distribution by Country')
plt.show()

# Summary statistics
print("Summary Statistics:\n", df.groupby('country')[['GHI', 'DNI', 'DHI']].agg(['mean', 'median', 'std']))

# ANOVA for GHI
f_stat, p_val = f_oneway(benin['GHI'], togo['GHI'], sierra['GHI'])
print(f"\nANOVA test for GHI:\nF-statistic = {f_stat:.2f}, p-value = {p_val:.4f}")
if p_val < 0.05:
    print("→ Statistically significant difference in GHI between countries.")
else:
    print("→ No significant difference in GHI between countries.")

# Bar chart of average GHI
df.groupby('country')['GHI'].mean().sort_values().plot(kind='bar', title='Average GHI by Country')
plt.ylabel('Average GHI')
plt.show()
