In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 🌍 AI for Climate Action: CO2 Emissions Analysis\n",
    "## UN Sustainable Development Goal 13: Climate Action\n",
    "\n",
    "This Jupyter notebook demonstrates a machine learning solution for predicting CO2 emissions and evaluating climate interventions."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Import Libraries and Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# Set style for better visualizations\n",
    "plt.style.use('seaborn-v0_8')\n",
    "sns.set_palette(\"husl\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Load and Explore Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the dataset\n",
    "df = pd.read_csv('sample_data/sample_emissions_data.csv')\n",
    "\n",
    "print(\"📊 Dataset Overview:\")\n",
    "print(f\"Dataset shape: {df.shape}\")\n",
    "print(\"\\nFirst 5 rows:\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"🔍 Dataset Information:\")\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"📈 Statistical Summary:\")\n",
    "df.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Data Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Correlation heatmap\n",
    "plt.figure(figsize=(12, 8))\n",
    "correlation_matrix = df.select_dtypes(include=[np.number]).corr()\n",
    "sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,\n",
    "            square=True, linewidths=0.5)\n",
    "plt.title('Correlation Matrix of Climate Features')\n",
    "plt.tight_layout()\n",
    "plt.savefig('assets/correlation_matrix.png', dpi=300, bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Distribution of CO2 emissions by region\n",
    "plt.figure(figsize=(10, 6))\n",
    "sns.boxplot(data=df, x='region', y='co2_emissions_mt')\n",
    "plt.title('CO2 Emissions Distribution by Region')\n",
    "plt.xticks(rotation=45)\n",
    "plt.tight_layout()\n",
    "plt.savefig('assets/emissions_by_region.png', dpi=300, bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Scatter plot: Renewable Energy vs CO2 Emissions\n",
    "plt.figure(figsize=(10, 6))\n",
    "sns.scatterplot(data=df, x='renewable_energy_percent', y='co2_emissions_mt', \n",
    "                hue='region', size='gdp_per_capita', sizes=(20, 200))\n",
    "plt.title('Renewable Energy vs CO2 Emissions')\n",
    "plt.xlabel('Renewable Energy (%)')\n",
    "plt.ylabel('CO2 Emissions (MT)')\n",
    "plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n",
    "plt.tight_layout()\n",
    "plt.savefig('assets/renewable_vs_emissions.png', dpi=300, bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Data Preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Select features and target variable\n",
    "features = [\n",
    "    'gdp_per_capita', 'population_millions', 'renewable_energy_percent',\n",
    "    'industrial_output', 'vehicle_per_1000', 'forest_coverage_percent',\n",
    "    'temperature_change'\n",
    "]\n",
    "\n",
    "X = df[features]\n",
    "y = df['co2_emissions_mt']\n",
    "\n",
    "print(\"📋 Features used for modeling:\")\n",
    "for i, feature in enumerate(features, 1):\n",
    "    print(f\"{i}. {feature}\")\n",
    "\n",
    "print(f\"\\nTarget variable: CO2 Emissions (MT)\")\n",
    "print(f\"Features shape: {X.shape}\")\n",
    "print(f\"Target shape: {y.shape}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Split the data\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    X, y, test_size=0.2, random_state=42\n",
    ")\n",
    "\n",
    "# Scale the features\n",
    "scaler = StandardScaler()\n",
    "X_train_scaled = scaler.fit_transform(X_train)\n",
    "X_test_scaled = scaler.transform(X_test)\n",
    "\n",
    "print(\"✅ Data preprocessing completed:\")\n",
    "print(f\"Training set: {X_train_scaled.shape}\")\n",
    "print(f\"Testing set: {X_test_scaled.shape}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Model Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize and train Random Forest model\n",
    "rf_model = RandomForestRegressor(\n",
    "    n_estimators=100,\n",
    "    max_depth=10,\n",
    "    random_state=42,\n",
    "    n_jobs=-1\n",
    ")\n",
    "\n",
    "print(\"🤖 Training Random Forest model...\")\n",
    "rf_model.fit(X_train_scaled, y_train)\n",
    "print(\"✅ Model training completed!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Also train a Linear Regression model for comparison\n",
    "lr_model = LinearRegression()\n",
    "lr_model.fit(X_train_scaled, y_train)\n",
    "print(\"✅ Linear Regression model trained!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Model Evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate_model(model, X_test, y_test, model_name):\n",
    "    \"\"\"Evaluate model performance and return metrics\"\"\"\n",
    "    y_pred = model.predict(X_test)\n",
    "    \n",
    "    metrics = {\n",
    "        'MAE': mean_absolute_error(y_test, y_pred),\n",
    "        'RMSE': np.sqrt(mean_squared_error(y_test, y_pred)),\n",
    "        'R2 Score': r2_score(y_test, y_pred)\n",
    "    }\n",
    "    \n",
    "    print(f\"\\n📊 {model_name} Performance:\")\n",
    "    for metric, value in metrics.items():\n",
    "        print(f\"{metric}: {value:.4f}\")\n",
    "    \n",
    "    return metrics, y_pred\n",
    "\n",
    "# Evaluate both models\n",
    "rf_metrics, rf_pred = evaluate_model(rf_model, X_test_scaled, y_test, \"Random Forest\")\n",
    "lr_metrics, lr_pred = evaluate_model(lr_model, X_test_scaled, y_test, \"Linear Regression\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Compare model performance\n",
    "comparison_df = pd.DataFrame({\n",
    "    'Random Forest': rf_metrics,\n",
    "    'Linear Regression': lr_metrics\n",
    "})\n",
    "\n",
    "print(\"\\n🆚 Model Comparison:\")\n",
    "comparison_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Feature Importance Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get feature importance from Random Forest\n",
    "feature_importance = pd.DataFrame({\n",
    "    'feature': features,\n",
    "    'importance': rf_model.feature_importances_\n",
    "}).sort_values('importance', ascending=False)\n",
    "\n",
    "print(\"🔍 Feature Importance (Random Forest):\")\n",
    "feature_importance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize feature importance\n",
    "plt.figure(figsize=(10, 6))\n",
    "sns.barplot(data=feature_importance, x='importance', y='feature')\n",
    "plt.title('Feature Importance in CO2 Emissions Prediction')\n",
    "plt.xlabel('Importance')\n",
    "plt.tight_layout()\n",
    "plt.savefig('assets/feature_importance.png', dpi=300, bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Predictions Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot predictions vs actual values\n",
    "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))\n",
    "\n",
    "# Random Forest predictions\n",
    "ax1.scatter(y_test, rf_pred, alpha=0.7, color='blue')\n",
    "ax1.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)\n",
    "ax1.set_xlabel('Actual CO2 Emissions (MT)')\n",
    "ax1.set_ylabel('Predicted CO2 Emissions (MT)')\n",
    "ax1.set_title('Random Forest: Actual vs Predicted')\n",
    "\n",
    "# Linear Regression predictions\n",
    "ax2.scatter(y_test, lr_pred, alpha=0.7, color='green')\n",
    "ax2.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)\n",
    "ax2.set_xlabel('Actual CO2 Emissions (MT)')\n",
    "ax2.set_ylabel('Predicted CO2 Emissions (MT)')\n",
    "ax2.set_title('Linear Regression: Actual vs Predicted')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig('assets/predictions_vs_actual.png', dpi=300, bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9. Climate Intervention Scenarios"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict_emissions_reduction(model, scaler, country_data, intervention_scenarios, features):\n",
    "    \"\"\"Predict emissions reduction for different intervention scenarios\"\"\"\n",
    "    predictions = {}\n",
    "    \n",
    "    # Current emissions\n",
    "    current_emission = country_data['co2_emissions_mt']\n",
    "    country_features = country_data[features].values\n",
    "    \n",
    "    for scenario, changes in intervention_scenarios.items():\n",
    "        modified_features = country_features.copy()\n",
    "        \n",
    "        # Apply changes based on scenario\n",
    "        for feature, change in changes.items():\n",
    "            feature_idx = features.index(feature)\n",
    "            if '%' in str(change):\n",
    "                # Percentage change\n",
    "                pct_change = float(change.strip('%')) / 100\n",
    "                modified_features[feature_idx] *= (1 + pct_change)\n",
    "            else:\n",
    "                # Absolute change\n",
    "                modified_features[feature_idx] += change\n",
    "        \n",
    "        # Predict new emissions\n",
    "        scaled_data = scaler.transform([modified_features])\n",
    "        new_emission = model.predict(scaled_data)[0]\n",
    "        predictions[scenario] = new_emission\n",
    "    \n",
    "    return predictions, current_emission\n",
    "\n",
    "# Select a sample country for analysis\n",
    "sample_country = df.iloc[0]\n",
    "\n",
    "# Define intervention scenarios\n",
    "intervention_scenarios = {\n",
    "    'Current Policy': {},\n",
    "    'Renewable Energy Boost': {'renewable_energy_percent': 20},  # +20%\n",
    "    'Sustainable Transport': {'vehicle_per_1000': -50, 'renewable_energy_percent': 10},\n",
    "    'Green Economy': {\n",
    "        'renewable_energy_percent': 25,\n",
    "        'forest_coverage_percent': 15,\n",
    "        'industrial_output': -10\n",
    "    }\n",
    "}\n",
    "\n",
    "print(f\"🌍 Climate Intervention Analysis for {sample_country['country']} ({sample_country['region']})\")\n",
    "print(\"=\" * 60)\n",
    "\n",
    "# Get predictions\n",
    "predictions, current_emission = predict_emissions_reduction(\n",
    "    rf_model, scaler, sample_country, intervention_scenarios, features\n",
    ")\n",
    "\n",
    "print(f\"\\nCurrent emissions: {current_emission:.2f} MT\\n\")\n",
    "\n",
    "print(\"Emissions under different scenarios:\")\n",
    "for scenario, emission in predictions.items():\n",
    "    reduction = ((current_emission - emission) / current_emission) * 100\n",
    "    print(f\"  • {scenario}: {emission:.2f} MT ({reduction:+.1f}% reduction)\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize intervention impacts\n",
    "scenarios = list(predictions.keys())\n",
    "emissions = list(predictions.values())\n",
    "reductions = [((current_emission - e) / current_emission) * 100 for e in emissions]\n",
    "\n",
    "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))\n",
    "\n",
    "# Emissions by scenario\n",
    "bars = ax1.bar(scenarios, emissions, color=['lightgray', 'lightgreen', 'lightblue', 'darkgreen'])\n",
    "ax1.set_ylabel('CO2 Emissions (MT)')\n",
    "ax1.set_title('CO2 Emissions Under Different Scenarios')\n",
    "ax1.tick_params(axis='x', rotation=45)\n",
    "\n",
    "# Add value labels on bars\n",
    "for bar, emission in zip(bars, emissions):\n",
    "    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5, \n",
    "             f'{emission:.1f}', ha='center', va='bottom')\n",
    "\n",
    "# Reduction percentages\n",
    "bars2 = ax2.bar(scenarios[1:], reductions[1:], color=['green', 'blue', 'darkgreen'])\n",
    "ax2.set_ylabel('Reduction Percentage (%)')\n",
    "ax2.set_title('Emissions Reduction Compared to Current Policy')\n",
    "ax2.tick_params(axis='x', rotation=45)\n",
    "\n",
    "# Add value labels on bars\n",
    "for bar, reduction in zip(bars2, reductions[1:]):\n",
    "    ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5, \n",
    "             f'{reduction:.1f}%', ha='center', va='bottom')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig('assets/intervention_analysis.png', dpi=300, bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 10. Ethical Considerations and Conclusion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"⚖️ ETHICAL CONSIDERATIONS\")\n",
    "print(\"=\" * 50)\n",
    "\n",
    "ethical_points = [\n",
    "    \"• Data Representation: Ensure fair representation of developing vs developed countries\",\n",
    "    \"• Economic Equity: Consider different economic capacities for climate action\",\n",
    "    \"• Historical Responsibility: Acknowledge historical emissions in policy recommendations\",\n",
    "    \"• Bias Mitigation: Regularly audit for algorithmic bias in predictions\",\n",
    "    \"• Transparency: Make model decisions interpretable to policymakers\",\n",
    "    \"• Local Context: Adapt recommendations to regional socio-economic conditions\"\n",
    "]\n",
    "\n",
    "for point in ethical_points:\n",
    "    print(point)\n",
    "\n",
    "print(\"\\n🌍 IMPACT ON SDG 13: CLIMATE ACTION\")\n",
    "print(\"=\" * 50)\n",
    "\n",
    "impact_points = [\n",
    "    \"✓ Target 13.2: Integrate climate change measures into national policies\",\n",
    "    \"✓ Target 13.3: Build knowledge and capacity to meet climate change\",\n",
    "    \"✓ Provides data-driven insights for evidence-based policymaking\",\n",
    "    \"✓ Enables testing of climate intervention scenarios before implementation\",\n",
    "    \"✓ Identifies most impactful sustainability measures\",\n",
    "    \"✓ Supports global cooperation through shared data insights\"\n",
    "]\n",
    "\n",
    "for point in impact_points:\n",
    "    print(point)\n",
    "\n",
    "print(\"\\n🎯 CONCLUSION\")\n",
    "print(\"=\" * 50)\n",
    "print(\"This AI solution demonstrates the potential of machine learning in\")\n",
    "print(\"addressing climate change by providing actionable insights for\")\n",
    "print(\"sustainable development and evidence-based climate policy.\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}