# Iris Dataset Analysis

This notebook covers loading, exploring, analyzing, and visualizing the Iris dataset.

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

## Task 1: Load and Explore the Dataset

In [None]:
# Load the dataset with error handling
try:
    iris = load_iris()
    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
    
    print("First 5 rows of the dataset:")
    print(df.head())

    print("\nDataset Information:")
    print(df.info())

    print("\nMissing values in each column:")
    print(df.isnull().sum())

except Exception as e:
    print(f"Error loading dataset: {e}")

# Cleaning dataset (dropping missing values if any)
df = df.dropna()

## Task 2: Basic Data Analysis

In [None]:
# Basic statistics
print("\nDescriptive Statistics:")
print(df.describe())

# Group by species and compute mean
grouped_means = df.groupby('species').mean()
print("\nMean of features grouped by species:")
print(grouped_means)

## Task 3: Data Visualization

In [None]:
# 1. Line chart - Mean Petal Length by Species
plt.figure(figsize=(8,5))
grouped_means['petal length (cm)'].plot(marker='o')
plt.title('Mean Petal Length by Species')
plt.xlabel('Species')
plt.ylabel('Petal Length (cm)')
plt.grid(True)
plt.show()

In [None]:
# 2. Bar chart - Average Sepal Width per Species (Corrected)
grouped_means_reset = grouped_means.reset_index()

plt.figure(figsize=(8,5))
sns.barplot(x='species', y='sepal width (cm)', data=grouped_means_reset)
plt.title('Average Sepal Width per Species')
plt.xlabel('Species')
plt.ylabel('Sepal Width (cm)')
plt.show()

In [None]:
# 3. Histogram - Distribution of Petal Length
plt.figure(figsize=(8,5))
plt.hist(df['petal length (cm)'], bins=20, color='skyblue', edgecolor='black')
plt.title('Distribution of Petal Length')
plt.xlabel('Petal Length (cm)')
plt.ylabel('Frequency')
plt.show()

In [None]:
# 4. Scatter plot - Sepal Length vs Petal Length
plt.figure(figsize=(8,5))
sns.scatterplot(x='sepal length (cm)', y='petal length (cm)', hue='species', data=df)
plt.title('Sepal Length vs Petal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Petal Length (cm)')
plt.legend(title='Species')
plt.show()