# Task 1 - Iris Dataset Exploration
# Colab-ready Python script. Copy each cell into a Jupyter/Colab notebook cell and run.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
sns.set(style='whitegrid')

In [None]:
iris = sns.load_dataset('iris')

In [None]:
print('\nShape of dataset:', iris.shape)
print('\nColumn names:', list(iris.columns))
print('\nFirst 5 rows:\n')
print(iris.head())


print('\nInfo:\n')
print(iris.info())


print('\nSummary statistics:\n')
print(iris.describe())

In [None]:
if 'species' in iris.columns:
    print('\nSpecies distribution:')
    print(iris['species'].value_counts())

In [None]:
# Pairplot gives a compact view of relationships between numeric features
plt.figure(figsize=(10, 8))
sns.pairplot(iris, hue='species', diag_kind='hist')
plt.suptitle('Pairplot of Iris features by species', y=1.02)
plt.show()


# Focused scatter plot, example sepal length vs sepal width
plt.figure(figsize=(8, 6))
ax = sns.scatterplot(data=iris, x='sepal_length', y='sepal_width', hue='species', s=80)
ax.set_title('Sepal length vs Sepal width, colored by species')
plt.show()

# Petal length vs petal width
plt.figure(figsize=(8, 6))
ax = sns.scatterplot(data=iris, x='petal_length', y='petal_width', hue='species', s=80)
ax.set_title('Petal length vs Petal width, colored by species')
plt.show()

In [None]:
numeric_cols = iris.select_dtypes(include=[np.number]).columns.tolist()


plt.figure(figsize=(12, 8))
for i, col in enumerate(numeric_cols, 1):
    plt.subplot(2, 2, i)
    sns.histplot(iris[col], kde=True)
    plt.title(f'Histogram of {col}')


plt.tight_layout()
plt.show()


# Histograms by species
for col in numeric_cols:
  plt.figure(figsize=(8, 4))
  sns.histplot(data=iris, x=col, hue='species', kde=True, element='step')
  plt.title(f'Histogram of {col} by species')
  plt.show()

In [None]:
plt.figure(figsize=(12, 8))
for i, col in enumerate(numeric_cols, 1):
  plt.subplot(2, 2, i)
  sns.boxplot(x='species', y=col, data=iris)
  plt.title(f'Box plot of {col} by species')


plt.tight_layout()
plt.show()



In [None]:
if 'species' in iris.columns:
  group_stats = iris.groupby('species').agg(['mean', 'std', 'min', 'max'])
  print('\nGroup-wise statistics by species:\n')
  print(group_stats)

In [None]:
corr = iris.select_dtypes(include=[np.number]).corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, fmt='.2f', cmap='Blues')
plt.title('Correlation matrix of numeric features')
plt.show()
