In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": ["# EcoSynthAI: Data Exploration"]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    import seaborn as sns
    
    # Load biodiversity and climate datasets
    biodiversity_data = pd.read_csv('../data/raw/biodiversity/global_species.csv')
    climate_data = pd.read_csv('../data/raw/climate_data/global_climate.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Exploratory Data Analysis\n",
    "def analyze_biodiversity(df):\n",
    "    print(\"Dataset Overview:\")\n",
    "    print(df.info())\n",
    "    \n",
    "    print(\"\\nSpecies Distribution by Habitat:\")\n",
    "    habitat_counts = df['habitat'].value_counts()\n",
    "    plt.figure(figsize=(10, 6))\n",
    "    habitat_counts.plot(kind='bar')\n",
    "    plt.title('Species Distribution by Habitat')\n",
    "    plt.xlabel('Habitat')\n",
    "    plt.ylabel('Number of Species')\n",
    "    plt.show()\n",
    "\n",
    "analyze_biodiversity(biodiversity_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Climate Data Correlation Analysis\n",
    "def analyze_climate_correlations(df):\n",
    "    correlation_matrix = df.corr()\n",
    "    plt.figure(figsize=(10, 8))\n",
    "    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')\n",
    "    plt.title('Climate Variables Correlation Matrix')\n",
    "    plt.show()\n",
    "\n",
    "analyze_climate_correlations(climate_data)"
   ]
  }
 ]
}