# Analyzing Data with Pandas and Visualizing Results with Matplotlib

In [None]:
# Analyzing Data with Pandas and Visualizing Results with Matplotlib

# --- Task 1: Load and Explore the Dataset ---

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Load Iris dataset from sklearn
iris = load_iris(as_frame=True)
df = iris.frame

# Display first few rows
print("First five rows:")
print(df.head())

# Check structure (data types, null values)
print("\nDataset Info:")
print(df.info())

print("\nMissing values:")
print(df.isnull().sum())

# --- Task 2: Basic Data Analysis ---

# Basic statistics
print("\nDescriptive statistics:")
print(df.describe())

# Grouping by species and calculating mean
print("\nMean values by species:")
print(df.groupby("target").mean())

# Replace numeric target with actual species names for readability
df["species"] = df["target"].map({i: name for i, name in enumerate(iris.target_names)})

print("\nMean values by species (named):")
print(df.groupby("species").mean())

# --- Task 3: Data Visualization ---

plt.style.use("seaborn-v0_8")  # prettier plots

# 1. Line chart (simulate as if features are sequential measurements)
plt.figure(figsize=(8,5))
plt.plot(df.index, df["sepal length (cm)"], label="Sepal Length")
plt.plot(df.index, df["petal length (cm)"], label="Petal Length")
plt.title("Line Chart of Sepal & Petal Lengths")
plt.xlabel("Sample Index")
plt.ylabel("Length (cm)")
plt.legend()
plt.show()

# 2. Bar chart: Average petal length per species
plt.figure(figsize=(6,4))
df.groupby("species")["petal length (cm)"].mean().plot(kind="bar", color=["skyblue","orange","green"])
plt.title("Average Petal Length by Species")
plt.ylabel("Petal Length (cm)")
plt.show()

# 3. Histogram: Distribution of sepal width
plt.figure(figsize=(6,4))
plt.hist(df["sepal width (cm)"], bins=15, color="purple", edgecolor="black")
plt.title("Histogram of Sepal Width")
plt.xlabel("Sepal Width (cm)")
plt.ylabel("Frequency")
plt.show()

# 4. Scatter plot: Sepal length vs Petal length
plt.figure(figsize=(6,4))
sns.scatterplot(x="sepal length (cm)", y="petal length (cm)", hue="species", data=df, palette="Set2")
plt.title("Scatter Plot of Sepal vs Petal Length")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Petal Length (cm)")
plt.legend(title="Species")
plt.show()

# --- Findings / Observations ---
# 1. Setosa species generally has smaller petal lengths compared to Versicolor and Virginica.
# 2. Sepal widths show a fairly normal distribution around 3 cm.
# 3. Clear separation is visible in scatter plot, making Iris dataset good for classification tasks.
# 4. Line plot shows trends in how sepal and petal lengths vary across samples.
