# Cross-Country Solar Comparison
EDA and statistical comparison for Benin, Sierra Leone, and Togo

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import f_oneway

In [None]:
# Load cleaned datasets
benin = pd.read_csv('data/benin_clean.csv')
sierra = pd.read_csv('data/sierra_leone_clean.csv')
togo = pd.read_csv('data/togo_clean.csv')

In [None]:
# Summary statistics for GHI, DNI, DHI
metrics = ['GHI', 'DNI', 'DHI']
summary = pd.DataFrame(columns=['Country', 'Metric', 'Mean', 'Median', 'Std'])

for df, country in zip([benin, sierra, togo], ['Benin', 'Sierra Leone', 'Togo']):
    for metric in metrics:
        summary = summary.append({
            'Country': country,
            'Metric': metric,
            'Mean': df[metric].mean(),
            'Median': df[metric].median(),
            'Std': df[metric].std()
        }, ignore_index=True)

summary

In [None]:
# Boxplots for GHI, DNI, DHI
for metric in metrics:
    plt.figure(figsize=(10,6))
    data_dict = {
        'Benin': benin[metric],
        'Sierra Leone': sierra[metric],
        'Togo': togo[metric]
    }
    sns.boxplot(data=pd.DataFrame(data_dict))
    plt.title(f'{metric} Comparison Across Countries')
    plt.ylabel(f'{metric} (W/mÂ²)')
    plt.show()

In [None]:
# ANOVA test for GHI
f_stat, p_val = f_oneway(benin['GHI'], sierra['GHI'], togo['GHI'])
print(f"ANOVA F-statistic: {f_stat:.2f}, p-value: {p_val:.4f}")

## Key Observations
- Benin shows highest median GHI but also highest variability.
- Togo has lower DNI on average compared to the other countries.
- Sierra Leone shows moderate solar potential with relatively stable trends.