# 📊 Analyzing Data with Pandas & Visualizing with Matplotlib
 
Dataset: **Iris dataset**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
import os

## Load and Explore Dataset

In [None]:
# Load dataset with error handling
try:
    iris = load_iris(as_frame=True)
    df = iris.frame
    df.rename(columns={'target': 'species'}, inplace=True)
    df['species'] = df['species'].map(dict(zip(range(3), iris.target_names)))
    print("✅ Dataset loaded successfully!")
except FileNotFoundError:
    print("❌ Error: Dataset file not found.")
except Exception as e:
    print("❌ Error loading dataset:", e)

# Display first rows
df.head()

In [None]:
# Dataset info
print("\nDataset info:")
print(df.info())

print("\nMissing values per column:")
print(df.isnull().sum())

## Basic Data Analysis

In [None]:
# Descriptive statistics
df.describe()

In [None]:
# Group by species and compute means
grouped_means = df.groupby('species').mean()
grouped_means

In [None]:
# Inference
print("Observation: Iris-virginica tends to have the largest petal dimensions on average.")

## Data Visualization

In [None]:
# Ensure output directory exists
os.makedirs("plots", exist_ok=True)

In [None]:
df_sorted = df.sort_values(by="petal length (cm)")
plt.figure(figsize=(8,5))
plt.plot(df_sorted["petal length (cm)"].reset_index(drop=True))
plt.title("Line Chart: Petal Length Trend")
plt.xlabel("Sample Index (sorted by petal length)")
plt.ylabel("Petal Length (cm)")
plt.grid(True)
plt.savefig("plots/line_chart.png")
plt.show()

In [None]:
plt.figure(figsize=(8,5))
sns.barplot(x="species", y="petal length (cm)", data=df, estimator="mean", ci=None)
plt.title("Bar Chart: Avg Petal Length by Species")
plt.ylabel("Average Petal Length (cm)")
plt.savefig("plots/bar_chart.png")
plt.show()

In [None]:
plt.figure(figsize=(8,5))
plt.hist(df["sepal width (cm)"], bins=15, edgecolor="black")
plt.title("Histogram: Sepal Width Distribution")
plt.xlabel("Sepal Width (cm)")
plt.ylabel("Frequency")
plt.savefig("plots/histogram.png")
plt.show()

In [None]:
plt.figure(figsize=(8,5))
sns.scatterplot(x="sepal length (cm)", y="petal length (cm)", hue="species", data=df)
plt.title("Scatter Plot: Sepal vs. Petal Length")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Petal Length (cm)")
plt.legend(title="Species")
plt.savefig("plots/scatter_plot.png")
plt.show()