In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import f_oneway
import os

In [2]:

# Create plots directory if it doesn't exist
if not os.path.exists('plots'):
    os.makedirs('plots')

# Load cleaned datasets
benin = pd.read_csv('data/benin_clean.csv')
sierraleone = pd.read_csv('data/sierraleone_clean.csv')
togo = pd.read_csv('data/togo_clean.csv')

In [3]:

# Add country identifier
benin['Country'] = 'Benin'
sierraleone['Country'] = 'Sierra Leone'
togo['Country'] = 'Togo'

# Combine datasets
df_combined = pd.concat([benin, sierraleone, togo], ignore_index=True)
print("Combined dataset shape:", df_combined.shape)

# Summary statistics by country
key_columns = ['GHI', 'DNI', 'DHI', 'Tamb', 'RH', 'WS']
summary_stats = df_combined.groupby('Country')[key_columns].agg(['mean', 'std', 'min', 'max'])
print("\nSummary Statistics by Country:")
print(summary_stats)

# Save summary statistics to CSV
summary_stats.to_csv('data/cross_country_summary.csv')
print("Summary statistics saved to data/cross_country_summary.csv")

Combined dataset shape: (1576800, 20)

Summary Statistics by Country:
                     GHI                                   DNI              \
                    mean         std   min     max        mean         std   
Country                                                                      
Benin         240.559452  331.131327 -12.9  1413.0  167.187516  261.710501   
Sierra Leone  201.957515  298.495150 -19.5  1499.0  116.376337  218.652659   
Togo          230.555040  322.532347 -12.7  1424.0  151.258469  250.956962   

                                  DHI              ...  Tamb               RH  \
              min     max        mean         std  ...   min   max       mean   
Country                                            ...                          
Benin        -7.8   952.3  115.358961  158.691074  ...  11.0  43.8  54.487969   
Sierra Leone -7.8   946.0  113.720571  158.946032  ...  12.3  39.9  79.448857   
Togo          0.0  1004.5  116.444352  156.520714  ...  

In [4]:

# Boxplots for GHI, DNI, DHI
plt.figure(figsize=(12, 8))
for i, col in enumerate(['GHI', 'DNI', 'DHI'], 1):
    plt.subplot(1, 3, i)
    sns.boxplot(x='Country', y=col, data=df_combined)
    plt.title(f'{col} by Country')
plt.tight_layout()
plt.savefig('plots/boxplots_ghi_dni_dhi.png')
plt.close()

In [5]:

# Boxplot for Tamb
plt.figure(figsize=(8, 6))
sns.boxplot(x='Country', y='Tamb', data=df_combined)
plt.title('Ambient Temperature by Country')
plt.savefig('plots/boxplot_tamb.png')
plt.close()

In [6]:

# Statistical test: ANOVA for GHI, DNI, DHI
print("\nANOVA Results:")
for col in ['GHI', 'DNI', 'DHI']:
    f_stat, p_val = f_oneway(
        benin[col], sierraleone[col], togo[col]
    )
    print(f"{col} - F-statistic: {f_stat:.2f}, p-value: {p_val:.4f}")
    if p_val < 0.05:
        print(f"Significant differences in {col} across countries (p < 0.05)")
    else:
        print(f"No significant differences in {col} across countries (p >= 0.05)")


ANOVA Results:
GHI - F-statistic: 2090.09, p-value: 0.0000
Significant differences in GHI across countries (p < 0.05)
DNI - F-statistic: 5940.05, p-value: 0.0000
Significant differences in DNI across countries (p < 0.05)
DHI - F-statistic: 39.56, p-value: 0.0000
Significant differences in DHI across countries (p < 0.05)


In [7]:

# Correlation analysis by country
plt.figure(figsize=(12, 8))
for i, country in enumerate(['Benin', 'Sierra Leone', 'Togo'], 1):
    plt.subplot(1, 3, i)
    corr = df_combined[df_combined['Country'] == country][key_columns].corr()
    sns.heatmap(corr, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
    plt.title(f'{country}: Correlation Heatmap')
plt.tight_layout()
plt.savefig('plots/correlation_heatmaps_by_country.png')
plt.close()

In [8]:

# Scatter plot: GHI vs Tamb by country
plt.figure(figsize=(10, 8))
sns.scatterplot(x='Tamb', y='GHI', hue='Country', size='RH', data=df_combined, alpha=0.5)
plt.title('GHI vs Tamb by Country (Bubble Size: RH)')
plt.savefig('plots/scatter_ghi_tamb_by_country.png')
plt.close()

print("Cross-country comparison complete. Plots saved to plots/ directory.")

  plt.savefig('plots/scatter_ghi_tamb_by_country.png')


Cross-country comparison complete. Plots saved to plots/ directory.
