In [2]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Cross-Country Solar Data Comparison\n",
    "\n",
    "This notebook compares solar radiation data across Benin, Sierra Leone, and Togo to identify optimal locations for solar installations."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {},
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from scipy import stats\n",
    "import plotly.express as px\n",
    "import plotly.graph_objects as go\n",
    "from utils import load_and_clean_data\n",
    "\n",
    "# Set style\n",
    "plt.style.use('seaborn')\n",
    "sns.set_palette('husl')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load and Prepare Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {},
   "source": [
    "# Load cleaned data for each country\n",
    "countries = {\n",
    "    'Benin': '../data/benin_clean.csv',\n",
    "    'Sierra Leone': '../data/sierra_leone_clean.csv',\n",
    "    'Togo': '../data/togo_clean.csv'\n",
    "}\n",
    "\n",
    "dfs = {}\n",
    "for country, filepath in countries.items():\n",
    "    dfs[country] = load_and_clean_data(filepath)\n",
    "    print(f\"{country} data shape: {dfs[country].shape}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Metric Comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {},
   "source": [
    "# Create boxplots for each metric\n",
    "metrics = ['GHI', 'DNI', 'DHI']\n",
    "fig, axes = plt.subplots(1, 3, figsize=(20, 6))\n",
    "\n",
    "for i, metric in enumerate(metrics):\n",
    "    data = []\n",
    "    labels = []\n",
    "    for country, df in dfs.items():\n",
    "        data.append(df[metric])\n",
    "        labels.extend([country] * len(df))\n",
    "    \n",
    "    sns.boxplot(data=data, ax=axes[i])\n",
    "    axes[i].set_title(f'{metric} Distribution by Country')\n",
    "    axes[i].set_xticklabels(countries.keys())\n",
    "    axes[i].set_ylabel(f'{metric} (W/m²)')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Statistical Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {},
   "source": [
    "# Perform ANOVA test for each metric\n",
    "for metric in metrics:\n",
    "    data = [df[metric] for df in dfs.values()]\n",
    "    f_stat, p_value = stats.f_oneway(*data)\n",
    "    print(f\"{metric} ANOVA test:\")\n",
    "    print(f\"F-statistic: {f_stat:.2f}\")\n",
    "    print(f\"p-value: {p_value:.2e}\")\n",
    "    print()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Summary Statistics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {},
   "source": [
    "# Create summary table\n",
    "summary_data = []\n",
    "for country, df in dfs.items():\n",
    "    for metric in metrics:\n",
    "        summary_data.append({\n",
    "            'Country': country,\n",
    "            'Metric': metric,\n",
    "            'Mean': df[metric].mean(),\n",
    "            'Median': df[metric].median(),\n",
    "            'Std': df[metric].std(),\n",
    "            'Min': df[metric].min(),\n",
    "            'Max': df[metric].max()\n",
    "        })\n",
    "\n",
    "summary_df = pd.DataFrame(summary_data)\n",
    "summary_df.round(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Daily Patterns Comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {},
   "source": [
    "# Compare daily patterns\n",
    "fig, axes = plt.subplots(1, 3, figsize=(20, 6))\n",
    "\n",
    "for i, metric in enumerate(metrics):\n",
    "    for country, df in dfs.items():\n",
    "        hourly_avg = df.groupby(df['Timestamp'].dt.hour)[metric].mean()\n",
    "        axes[i].plot(hourly_avg.index, hourly_avg.values, label=country)\n",
    "    \n",
    "    axes[i].set_title(f'Average {metric} by Hour of Day')\n",
    "    axes[i].set_xlabel('Hour of Day')\n",
    "    axes[i].set_ylabel(f'{metric} (W/m²)')\n",
    "    axes[i].legend()\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Temperature and Radiation Relationship"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {},
   "source": [
    "# Compare temperature-radiation relationships\n",
    "fig, axes = plt.subplots(1, 3, figsize=(20, 6))\n",
    "\n",
    "for i, metric in enumerate(metrics):\n",
    "    for country, df in dfs.items():\n",
    "        sns.regplot(data=df, x='Tamb', y=metric, ax=axes[i], label=country, scatter=False)\n",
    "    \n",
    "    axes[i].set_title(f'{metric} vs Temperature')\n",
    "    axes[i].set_xlabel('Temperature (°C)')\n",
    "    axes[i].set_ylabel(f'{metric} (W/m²)')\n",
    "    axes[i].legend()\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Key Observations\n",
    "\n",
    "1. **Solar Radiation Patterns**: [Add observation about radiation patterns across countries]\n",
    "2. **Temperature Impact**: [Add observation about temperature effects]\n",
    "3. **Optimal Locations**: [Add observation about best locations for solar installations]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Recommendations\n",
    "\n",
    "Based on the analysis, here are the key recommendations for solar installation:\n",
    "\n",
    "1. [Add first recommendation]\n",
    "2. [Add second recommendation]\n",
    "3. [Add third recommendation]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}

{'cells': [{'cell_type': 'markdown',
   'metadata': {},
   'source': ['# Cross-Country Solar Data Comparison\n',
    '\n',
    'This notebook compares solar radiation data across Benin, Sierra Leone, and Togo to identify optimal locations for solar installations.']},
  {'cell_type': 'code',
   'execution_count': 0,
   'metadata': {},
   'source': ['import pandas as pd\n',
    'import numpy as np\n',
    'import matplotlib.pyplot as plt\n',
    'import seaborn as sns\n',
    'from scipy import stats\n',
    'import plotly.express as px\n',
    'import plotly.graph_objects as go\n',
    'from utils import load_and_clean_data\n',
    '\n',
    '# Set style\n',
    "plt.style.use('seaborn')\n",
    "sns.set_palette('husl')"]},
  {'cell_type': 'markdown',
   'metadata': {},
   'source': ['## Load and Prepare Data']},
  {'cell_type': 'code',
   'execution_count': 0,
   'metadata': {},
   'source': ['# Load cleaned data for each country\n',
    'countries = {\n',
    "    'Benin': '../data/be