# Iris Dataset Analysis
This notebook explores the famous Iris dataset, performing data loading, exploration, analysis, and visualizations.

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Load the Iris dataset
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)

# Add the target column (species)
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)

# Display the first few rows
df.head()


In [None]:

# Check data types and missing values
df.info()

# Check for missing values
df.isnull().sum()


In [None]:

# Compute basic statistics for numerical columns
df.describe()


In [None]:

# Group by species and compute the mean of numerical columns
df.groupby("species").mean()


In [None]:

# Set the theme for seaborn
sns.set_theme(style="whitegrid")

# Line chart: Petal length trend per species
plt.figure(figsize=(8, 5))
sns.lineplot(data=df, x="species", y="petal length (cm)", estimator="mean", marker="o", ci=None)
plt.title("Average Petal Length per Species")
plt.xlabel("Species")
plt.ylabel("Petal Length (cm)")
plt.show()

# Bar chart: Average sepal width per species
plt.figure(figsize=(8, 5))
sns.barplot(data=df, x="species", y="sepal width (cm)", ci=None)
plt.title("Average Sepal Width per Species")
plt.xlabel("Species")
plt.ylabel("Sepal Width (cm)")
plt.show()

# Histogram: Distribution of petal length
plt.figure(figsize=(8, 5))
sns.histplot(df["petal length (cm)"], bins=20, kde=True, color="blue")
plt.title("Distribution of Petal Length")
plt.xlabel("Petal Length (cm)")
plt.ylabel("Frequency")
plt.show()

# Scatter plot: Sepal length vs. petal length
plt.figure(figsize=(8, 5))
sns.scatterplot(data=df, x="sepal length (cm)", y="petal length (cm)", hue="species", style="species")
plt.title("Sepal Length vs. Petal Length")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Petal Length (cm)")
plt.legend(title="Species")
plt.show()
