# 📘 Analyzing Data with Pandas and Visualizing Results with Matplotlib

## ✅ Task 1: Load and Explore the Dataset

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Load Iris dataset
try:
    iris = load_iris(as_frame=True)
    df = iris.frame
    print("Dataset loaded successfully.")
except Exception as e:
    print(f"Error loading dataset: {e}")

# Display first few rows
df.head()

In [None]:
# Explore dataset
df.info()

# Check for missing values
print(df.isnull().sum())

# No missing values in this dataset, so no cleaning required.


## ✅ Task 2: Basic Data Analysis

In [None]:
# Basic statistics
df.describe()

In [None]:
# Group by species and get mean of numerical columns
grouped = df.groupby('target').mean()
grouped

In [None]:
# Map species names
df['species'] = df['target'].map(lambda x: iris.target_names[x])
df[['species', 'sepal length (cm)', 'sepal width (cm)']].groupby('species').mean()

**📝 Observations**:
- Iris-virginica has the largest average petal and sepal lengths.
- Sepal width tends to be higher for Iris-setosa.

## ✅ Task 3: Data Visualization

In [None]:
# Line chart: Average sepal length over index
plt.figure(figsize=(10, 5))
plt.plot(df['sepal length (cm)'], label='Sepal Length')
plt.title("Line Chart - Sepal Length Trend")
plt.xlabel("Index")
plt.ylabel("Sepal Length (cm)")
plt.legend()
plt.grid()
plt.show()

In [None]:
# Bar chart: Mean petal length per species
plt.figure(figsize=(8, 5))
sns.barplot(x='species', y='petal length (cm)', data=df, estimator='mean')
plt.title("Bar Chart - Average Petal Length per Species")
plt.xlabel("Species")
plt.ylabel("Average Petal Length (cm)")
plt.show()

In [None]:
# Histogram: Distribution of sepal width
plt.figure(figsize=(8, 5))
plt.hist(df['sepal width (cm)'], bins=15, color='skyblue', edgecolor='black')
plt.title("Histogram - Sepal Width Distribution")
plt.xlabel("Sepal Width (cm)")
plt.ylabel("Frequency")
plt.show()

In [None]:
# Scatter plot: Sepal length vs Petal length
plt.figure(figsize=(8, 5))
sns.scatterplot(x='sepal length (cm)', y='petal length (cm)', hue='species', data=df)
plt.title("Scatter Plot - Sepal vs Petal Length")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Petal Length (cm)")
plt.legend()
plt.show()

## ✅ Conclusion / Insights

- Iris-setosa stands out with smaller petal lengths and higher sepal width.
- Clear linear relationship between sepal length and petal length for some species.
- No missing data, making it ideal for classification or clustering models.
