In [9]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Iris Dataset Analysis & Visualization\n",
    "This notebook performs data loading, exploration, analysis, and visualization of the Iris dataset using **pandas**, **matplotlib**, and **seaborn**."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 0: Save the Iris Dataset as CSV\n",
    "We first save the dataset as a CSV file so it can be loaded later."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_iris\n",
    "import pandas as pd\n",
    "\n",
    "# Load Iris dataset\n",
    "iris = load_iris()\n",
    "df_iris = pd.DataFrame(data=iris.data, columns=iris.feature_names)\n",
    "df_iris['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)\n",
    "\n",
    "# Save to CSV\n",
    "csv_file = \"iris_dataset.csv\"\n",
    "df_iris.to_csv(csv_file, index=False)\n",
    "print(f\"Iris dataset saved as '{csv_file}'\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 1: Load and Explore the Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "file_path = \"iris_dataset.csv\"\n",
    "\n",
    "try:\n",
    "    df = pd.read_csv(file_path)\n",
    "    print(\"Dataset loaded successfully!\\n\")\n",
    "except FileNotFoundError:\n",
    "    print(f\"Error: File '{file_path}' not found.\")\n",
    "    exit()\n",
    "except pd.errors.EmptyDataError:\n",
    "    print(f\"Error: File '{file_path}' is empty.\")\n",
    "    exit()\n",
    "except Exception as e:\n",
    "    print(f\"An unexpected error occurred: {e}\")\n",
    "    exit()\n",
    "\n",
    "# Inspect first 5 rows\n",
    "print(\"First 5 rows of the dataset:\")\n",
    "print(df.head())\n",
    "\n",
    "# Dataset info\n",
    "print(\"\\nDataset info:\")\n",
    "print(df.info())\n",
    "\n",
    "# Check for missing values\n",
    "print(\"\\nMissing values per column:\")\n",
    "print(df.isnull().sum())\n",
    "\n",
    "# Fill missing values (not needed for Iris, but included for completeness)\n",
    "df.fillna(method='ffill', inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2: Basic Data Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Basic statistics\n",
    "print(\"\\nBasic statistics:\")\n",
    "print(df.describe())\n",
    "\n",
    "# Group by species and compute mean\n",
    "grouped = df.groupby('species').mean()\n",
    "print(\"\\nMean values per species:\")\n",
    "print(grouped)\n",
    "\n",
    "# Observations\n",
    "print(\"\\nObservations:\")\n",
    "print(\"- Setosa species generally has smaller measurements.\")\n",
    "print(\"- Versicolor species has intermediate sizes.\")\n",
    "print(\"- Virginica species tends to have the largest measurements.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 3: Data Visualization\n",
    "We will create four different plots to visualize the dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.set(style=\"whitegrid\")  # Seaborn style\n",
    "\n",
    "# 1) Line chart: Sepal length trend across samples\n",
    "plt.figure(figsize=(8,5))\n",
    "plt.plot(df.index, df['sepal length (cm)'], color='blue', label='Sepal Length')\n",
    "plt.title('Sepal Length Trend Across Samples')\n",
    "plt.xlabel('Sample Index')\n",
    "plt.ylabel('Sepal Length (cm)')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2) Bar chart: average petal length per species\n",
    "plt.figure(figsize=(6,4))\n",
    "grouped['petal length (cm)'].plot(kind='bar', color=['green','orange','red'])\n",
    "plt.title('Average Petal Length per Species')\n",
    "plt.xlabel('Species')\n",
    "plt.ylabel('Petal Length (cm)')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3) Histogram: distribution of petal width\n",
    "plt.figure(figsize=(6,4))\n",
    "plt.hist(df['petal width (cm)'], bins=10, color='purple', edgecolor='black')\n",
    "plt.title('Distribution of Petal Width')\n",
    "plt.xlabel('Petal Width (cm)')\n",
    "plt.ylabel('Frequency')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4) Scatter plot: sepal length vs petal length colored by species\n",
    "plt.figure(figsize=(6,4))\n",
    "sns.scatterplot(data=df, x='sepal length (cm)', y='petal length (cm)', hue='species', palette='Set1')\n",
    "plt.title('Sepal Length vs Petal Length')\n",
    "plt.xlabel('Sepal Length (cm)')\n",
    "plt.ylabel('Petal Length (cm)')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### All tasks are completed successfully!\n",
    "- Dataset loaded and inspected\n",
    "- Basic analysis performed\n",
    "- Four visualizations created with titles, labels, and legends\n",
    "- File `iris_dataset.csv` is included for submission"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.10"
  }
 },
 "nbformat": 5,
 "nbformat_minor": 10
}

{'cells': [{'cell_type': 'markdown',
   'metadata': {},
   'source': ['# Iris Dataset Analysis & Visualization\n',
    'This notebook performs data loading, exploration, analysis, and visualization of the Iris dataset using **pandas**, **matplotlib**, and **seaborn**.']},
  {'cell_type': 'markdown',
   'metadata': {},
   'source': ['## Step 0: Save the Iris Dataset as CSV\n',
    'We first save the dataset as a CSV file so it can be loaded later.']},
  {'cell_type': 'code',
   'execution_count': None,
   'metadata': {},
   'outputs': [],
   'source': ['from sklearn.datasets import load_iris\n',
    'import pandas as pd\n',
    '\n',
    '# Load Iris dataset\n',
    'iris = load_iris()\n',
    'df_iris = pd.DataFrame(data=iris.data, columns=iris.feature_names)\n',
    "df_iris['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)\n",
    '\n',
    '# Save to CSV\n',
    'csv_file = "iris_dataset.csv"\n',
    'df_iris.to_csv(csv_file, index=False)\n',
    'print(f"Iris d