In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris


In [None]:
# Load the Iris dataset
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)

# Display first 5 rows
df.head()


In [None]:
# Data types
print(df.dtypes)

# Missing values
print("\nMissing values:")
print(df.isnull().sum())


In [None]:
# Summary statistics
df.describe()


In [None]:
# Mean values by species
df.groupby('species').mean()


In [None]:
df['index'] = df.index
plt.figure(figsize=(10,5))
sns.lineplot(data=df, x='index', y='petal length (cm)', hue='species')
plt.title('Petal Length Over Observations')
plt.xlabel('Observation Index')
plt.ylabel('Petal Length (cm)')
plt.show()


In [None]:
plt.figure(figsize=(8,5))
sns.barplot(data=df, x='species', y='petal length (cm)')
plt.title('Average Petal Length by Species')
plt.xlabel('Species')
plt.ylabel('Petal Length (cm)')
plt.show()


In [None]:
plt.figure(figsize=(8,5))
plt.hist(df['sepal width (cm)'], bins=15, color='skyblue', edgecolor='black')
plt.title('Distribution of Sepal Width')
plt.xlabel('Sepal Width (cm)')
plt.ylabel('Frequency')
plt.show()


In [None]:
plt.figure(figsize=(8,5))
sns.scatterplot(data=df, x='sepal length (cm)', y='petal length (cm)', hue='species')
plt.title('Sepal Length vs Petal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Petal Length (cm)')
plt.show()


In [None]:
## 📊 Findings:

- No missing data in the dataset.
- Setosa species has the smallest petals.
- Virginica has the largest petal and sepal dimensions.
- Species are clearly separated by petal length.
