In [None]:
# Assignment: Analyzing Data with Pandas and Visualizing Results

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Task 1: Load and Explore Dataset

try:
    # Load Iris dataset from sklearn
    iris = load_iris(as_frame=True)
    df = iris.frame

    print("✅ Dataset loaded successfully\n")
except Exception as e:
    print(f"Error loading dataset: {e}")

# Display first few rows
print("First 5 rows:")
print(df.head(), "\n")

# Dataset structure
print("Dataset Info:")
print(df.info(), "\n")

# Check missing values
print("Missing values per column:")
print(df.isnull().sum(), "\n")

# No missing values in Iris, but if they existed:
# df.fillna(df.mean(), inplace=True) or df.dropna(inplace=True)

# Task 2: Basic Data Analysis
print("Basic Statistics:")
print(df.describe(), "\n")

# Grouping by species (target)
species_group = df.groupby("target")["sepal length (cm)"].mean()
print("Average Sepal Length by Species:")
print(species_group, "\n")

# Interesting finding
print("Observation: Setosa has noticeably shorter sepals on average compared to Virginica.\n")

# Task 3: Data Visualization
sns.set_style("whitegrid")

# 1. Line chart – trend of sepal length over samples
plt.figure(figsize=(8, 5))
plt.plot(df["sepal length (cm)"], label="Sepal Length")
plt.title("Line Chart: Sepal Length Trend")
plt.xlabel("Sample Index")
plt.ylabel("Sepal Length (cm)")
plt.legend()
plt.show()

# 2. Bar chart – Average petal length per species
plt.figure(figsize=(8, 5))
df.groupby("target")["petal length (cm)"].mean().plot(kind="bar", color=["#4caf50", "#2196f3", "#ff5722"])
plt.title("Average Petal Length by Species")
plt.xlabel("Species (0=Setosa, 1=Versicolor, 2=Virginica)")
plt.ylabel("Avg Petal Length (cm)")
plt.show()

# 3. Histogram – distribution of sepal width
plt.figure(figsize=(8, 5))
plt.hist(df["sepal width (cm)"], bins=20, color="purple", edgecolor="black")
plt.title("Histogram: Sepal Width Distribution")
plt.xlabel("Sepal Width (cm)")
plt.ylabel("Frequency")
plt.show()

# 4. Scatter plot – Sepal length vs. Petal length
plt.figure(figsize=(8, 5))
plt.scatter(df["sepal length (cm)"], df["petal length (cm)"], c=df["target"], cmap="viridis")
plt.title("Scatter Plot: Sepal Length vs Petal Length")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Petal Length (cm)")
plt.colorbar(label="Species")
plt.show()
