In [None]:
# Cell 1: Imports and dataset loading
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Load Iris dataset
iris = load_iris(as_frame=True)
df = iris.frame

print("Dataset loaded successfully!")


In [None]:
# Cell 2: Explore dataset
print("First 5 rows:")
display(df.head())

print("\nDataset info:")
print(df.info())

print("\nMissing values per column:")
print(df.isnull().sum())


In [None]:
# Cell 3: Basic statistics
print("Basic statistics:")
display(df.describe())


In [None]:
# Cell 4: Group by target/species and compute mean
grouped = df.groupby('target').mean()
print("Mean values grouped by species (target):")
display(grouped)

# Add species names column
df['species'] = df['target'].map({i:name for i, name in enumerate(iris.target_names)})


In [None]:
# Cell 5: Line chart - Average petal length by species
grouped_petal_length = df.groupby('species')['petal length (cm)'].mean()

plt.figure(figsize=(8,5))
plt.plot(grouped_petal_length.index, grouped_petal_length.values, marker='o')
plt.title('Average Petal Length by Species')
plt.xlabel('Species')
plt.ylabel('Average Petal Length (cm)')
plt.grid(True)
plt.show()


In [None]:
# Cell 6: Bar chart - Average sepal width by species
grouped_sepal_width = df.groupby('species')['sepal width (cm)'].mean()

plt.figure(figsize=(8,5))
grouped_sepal_width.plot(kind='bar', color='skyblue')
plt.title('Average Sepal Width by Species')
plt.xlabel('Species')
plt.ylabel('Average Sepal Width (cm)')
plt.xticks(rotation=45)
plt.show()


In [None]:
# Cell 7: Histogram - Distribution of petal length
plt.figure(figsize=(8,5))
plt.hist(df['petal length (cm)'], bins=15, color='purple', edgecolor='black')
plt.title('Distribution of Petal Length')
plt.xlabel('Petal Length (cm)')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()


In [None]:
# Cell 8: Scatter plot - Sepal length vs Petal length colored by species
plt.figure(figsize=(8,5))
sns.scatterplot(data=df, x='sepal length (cm)', y='petal length (cm)', hue='species', palette='deep')
plt.title('Sepal Length vs Petal Length by Species')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Petal Length (cm)')
plt.legend(title='Species')
plt.grid(True)
plt.show()


In [None]:
# Cell 9: Findings and observations (Markdown cell recommended but Python print works)
print("""
Findings and Observations:
- Setosa species have significantly shorter petal lengths compared to Versicolor and Virginica.
- Sepal width is slightly larger in Setosa compared to others.
- There is a strong positive relationship between sepal length and petal length.
- Virginica species tend to have the largest measurements across most features.
""")
