In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Berlin Bulky Waste Simulation - Data Exploration\n",
    "\n",
    "This notebook explores the data sources and validates the simulation inputs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('..')\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from src.data_loader import DataLoader\n",
    "\n",
    "%matplotlib inline\n",
    "sns.set_style('whitegrid')\n",
    "plt.rcParams['figure.figsize'] = (12, 6)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Load Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "loader = DataLoader(data_dir='../data')\n",
    "ordnungsamt, population, geo, config, demographics = loader.load_all_data()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Explore Demographics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"District Demographics Summary:\")\n",
    "print(demographics.describe())\n",
    "\n",
    "# Plot youth ratio distribution\n",
    "plt.figure(figsize=(14, 6))\n",
    "plt.barh(demographics['bezirk'], demographics['youth_ratio'], color='steelblue')\n",
    "plt.xlabel('Youth Ratio (18-45 age group)')\n",
    "plt.title('Youth Ratio by District')\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Waste Categories"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "categories_df = pd.DataFrame(config['categories']).T\n",
    "print(\"\\nWaste Categories:\")\n",
    "print(categories_df[['name', 'app_sell_chance', 'frequency']])\n",
    "\n",
    "# Plot attractiveness\n",
    "plt.figure(figsize=(10, 6))\n",
    "plt.bar(categories_df['name'], categories_df['app_sell_chance'], color='coral')\n",
    "plt.xticks(rotation=45, ha='right')\n",
    "plt.ylabel('Attractiveness (Reuse Probability)')\n",
    "plt.title('Item Attractiveness by Category')\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Ordnungsamt Incidents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if 'datum' in ordnungsamt.columns:\n",
    "    ordnungsamt['month'] = pd.to_datetime(ordnungsamt['datum']).dt.month\n",
    "    \n",
    "    plt.figure(figsize=(12, 6))\n",
    "    ordnungsamt.groupby('month').size().plot(kind='bar', color='darkgreen')\n",
    "    plt.xlabel('Month')\n",
    "    plt.ylabel('Number of Incidents')\n",
    "    plt.title('Illegal Dumping Incidents by Month (2023)')\n",
    "    plt.tight_layout()\n",
    "    plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.9.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}