In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

# Load the dataset with error handling
try:
    iris = load_iris()
    df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
    df['target'] = iris.target
    df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
    print("Dataset loaded successfully.\n")
except Exception as e:
    print("Error loading dataset:", e)

print("First 5 rows of the dataset:")
display(df.head())

print("\nDataset Summary:")
display(df.describe())

# Check for missing values
if df.isnull().sum().any():
    df.dropna(inplace=True)
    print("Missing values found and removed.")
else:
    print("No missing values found.\n")

# Pairplot
sns.pairplot(df, hue="species")
plt.suptitle("Pairplot of Iris Dataset", y=1.02)
plt.show()

# Boxplot
plt.figure(figsize=(8, 5))
sns.boxplot(x='species', y='sepal length (cm)', data=df)
plt.title("Sepal Length by Species")
plt.xlabel("Species")
plt.ylabel("Sepal Length (cm)")
plt.show()

# Heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(df.iloc[:, :4].corr(), annot=True, cmap="YlGnBu", linewidths=0.5)
plt.title("Correlation Heatmap")
plt.show()

# Histogram
plt.figure(figsize=(7, 4))
for species in df['species'].unique():
    sns.histplot(df[df['species'] == species]['petal length (cm)'], kde=True, label=species)
plt.title("Petal Length Distribution by Species")
plt.xlabel("Petal Length (cm)")
plt.legend()
plt.show()

# Insights
print("Insights:")
print("- Setosa petals are generally shorter and narrower.")
print("- Strong correlation between petal width and petal length.")
print("- Dataset is well-balanced and suitable for classification.\n")
