In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Load dataset
try:
    iris_data = load_iris(as_frame=True)
    df = iris_data.frame
    print("Dataset loaded successfully.")
except Exception as e:
    print("Error loading dataset:", e)


In [None]:
# Display first 5 rows
df.head()


In [None]:
# Check the structure of the dataset
df.info()


In [None]:
# Check for missing values
df.isnull().sum()


In [None]:
# Clean the dataset (no missing values in Iris, but here’s how you’d handle them)
df_cleaned = df.dropna()


In [None]:
# Basic statistics of numerical columns
df_cleaned.describe()


In [None]:
# Add species names and group
df_cleaned['species'] = df_cleaned['target'].apply(lambda x: iris_data.target_names[x])
df_cleaned.groupby('species').mean()


In [None]:
# Line chart of sepal and petal length
plt.figure(figsize=(10, 5))
plt.plot(df_cleaned.index, df_cleaned['sepal length (cm)'], label='Sepal Length')
plt.plot(df_cleaned.index, df_cleaned['petal length (cm)'], label='Petal Length')
plt.title("Sepal and Petal Length Trend Over Samples")
plt.xlabel("Sample Index")
plt.ylabel("Length (cm)")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Bar chart: Average petal length by species
avg_petal_length = df_cleaned.groupby('species')['petal length (cm)'].mean()
avg_petal_length.plot(kind='bar', color='skyblue')
plt.title("Average Petal Length by Species")
plt.ylabel("Petal Length (cm)")
plt.xlabel("Species")
plt.show()


In [None]:
# Histogram of sepal width
plt.hist(df_cleaned['sepal width (cm)'], bins=10, color='salmon', edgecolor='black')
plt.title("Distribution of Sepal Width")
plt.xlabel("Sepal Width (cm)")
plt.ylabel("Frequency")
plt.show()


In [None]:
# Scatter plot: Sepal length vs. petal length
sns.scatterplot(data=df_cleaned, x='sepal length (cm)', y='petal length (cm)', hue='species')
plt.title("Sepal Length vs. Petal Length")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Petal Length (cm)")
plt.legend()
plt.grid(True)
plt.show()


### Final Observations
- The dataset is clean with no missing values.
- Setosa has distinct petal length and width, making it easy to separate.
- Petal dimensions are more discriminative than sepal dimensions for species classification.
- Visualizations reveal clear clustering, suitable for machine learning.
