In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Load dataset with error handling
try:
    iris = load_iris(as_frame=True)  # dictionary-like object
    df = iris.frame
    df['species'] = iris.target_names[iris.target]  # add categorical species column
    print("Dataset loaded successfully!")
except FileNotFoundError:
    print("Error: Dataset file not found.")
except Exception as e:
    print(f"An error occurred: {e}")

# Display first rows
print("\nFirst 5 rows:")
print(df.head())

# Inspect structure
print("\nDataset Info:")
print(df.info())

# Check for missing values
print("\nMissing values:")
print(df.isnull().sum())

# Clean dataset (drop rows with missing values if any)
df = df.dropna()


In [None]:
# Basic statistics
print("\nBasic Statistics:")
print(df.describe())

# Group by species and compute mean values
group_means = df.groupby("species").mean(numeric_only=True)
print("\nMean values by species:")
print(group_means)

# Observations
print("\nObservations:")
print("- Setosa has the smallest petal dimensions.")
print("- Virginica has the largest sepal and petal dimensions.")
print("- Versicolor values are in between Setosa and Virginica.")


In [None]:
# 1. Line Chart: Petal length trend
plt.figure(figsize=(8,5))
plt.plot(df.index, df['petal length (cm)'], label='Petal Length')
plt.title("Trend of Petal Length Across Samples")
plt.xlabel("Sample Index")
plt.ylabel("Petal Length (cm)")
plt.legend()
plt.show()

# 2. Bar Chart: Average Petal Length by Species
plt.figure(figsize=(8,5))
sns.barplot(x="species", y="petal length (cm)", data=df, estimator="mean", ci=None, palette="Set2")
plt.title("Average Petal Length by Species")
plt.xlabel("Species")
plt.ylabel("Average Petal Length (cm)")
plt.show()

# 3. Histogram: Distribution of Sepal Length
plt.figure(figsize=(8,5))
plt.hist(df["sepal length (cm)"], bins=15, color="skyblue", edgecolor="black")
plt.title("Distribution of Sepal Length")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Frequency")
plt.show()

# 4. Scatter Plot: Sepal Length vs. Petal Length
plt.figure(figsize=(8,5))
sns.scatterplot(
    x="sepal length (cm)", 
    y="petal length (cm)", 
    hue="species", 
    data=df,
    palette="Set1"
)
plt.title("Sepal Length vs. Petal Length by Species")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Petal Length (cm)")
plt.legend(title="Species")
plt.show()
