In [2]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# 3.1 Load Cleaned Data
benin = pd.read_csv('../data/benin_clean.csv')
togo = pd.read_csv('../data/togo_clean.csv')
sierra = pd.read_csv('../data/sierra_leone_clean.csv')

# 3.2 Boxplots
plt.figure(figsize=(15, 5))
data = [benin['GHI'], togo['GHI'], sierra['GHI']]
plt.boxplot(data, labels=['Benin', 'Togo', 'Sierra Leone'])
plt.title('GHI Distribution by Country')
plt.ylabel('GHI (W/m²)')
plt.show()

# 3.3 Summary Table
def get_stats(df, metric):
    return {
        'mean': df[metric].mean(),
        'median': df[metric].median(),
        'std': df[metric].std()
    }

metrics = ['GHI', 'DNI', 'DHI']
countries = {'Benin': benin, 'Togo': togo, 'Sierra Leone': sierra}

summary_data = []
for country, df in countries.items():
    for metric in metrics:
        stats = get_stats(df, metric)
        stats.update({'Country': country, 'Metric': metric})
        summary_data.append(stats)

summary_df = pd.DataFrame(summary_data)
print("Summary Statistics:")
print(summary_df)

# 3.4 Statistical Testing
f_stat, p_value = stats.f_oneway(benin['GHI'], togo['GHI'], sierra['GHI'])
print("\nANOVA Test Results:")
print(f"F-statistic: {f_stat:.2f}")
print(f"p-value: {p_value:.2e}")

# 3.5 Visual Summary
avg_ghi = {
    'Benin': benin['GHI'].mean(),
    'Togo': togo['GHI'].mean(),
    'Sierra Leone': sierra['GHI'].mean()
}

plt.figure(figsize=(8, 5))
plt.bar(avg_ghi.keys(), avg_ghi.values())
plt.title('Average GHI by Country')
plt.ylabel('GHI (W/m²)')
plt.xticks(rotation=45)
plt.show()

{'cells': [{'cell_type': 'markdown',
   'metadata': {},
   'source': ['# Cross-Country Solar Data Comparison\n',
    '\n',
    'This notebook compares solar radiation data across Benin, Sierra Leone, and Togo to identify optimal locations for solar installations.']},
  {'cell_type': 'code',
   'execution_count': 0,
   'metadata': {},
   'source': ['import pandas as pd\n',
    'import numpy as np\n',
    'import matplotlib.pyplot as plt\n',
    'import seaborn as sns\n',
    'from scipy import stats\n',
    'import plotly.express as px\n',
    'import plotly.graph_objects as go\n',
    'from utils import load_and_clean_data\n',
    '\n',
    '# Set style\n',
    "plt.style.use('seaborn')\n",
    "sns.set_palette('husl')"]},
  {'cell_type': 'markdown',
   'metadata': {},
   'source': ['## Load and Prepare Data']},
  {'cell_type': 'code',
   'execution_count': 0,
   'metadata': {},
   'source': ['# Load cleaned data for each country\n',
    'countries = {\n',
    "    'Benin': '../data/be