In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

# Task 1: Load and Explore the Dataset
try:
    # Load dataset
    iris_raw = load_iris(as_frame=True)
    iris_df = iris_raw.frame
    print("Dataset loaded successfully.")
except Exception as e:
    print(f"Error loading dataset: {e}")

# Display first few rows
print("\nFirst 5 rows of the dataset:")
print(iris_df.head())

# Check data types and missing values
print("\nData Types:")
print(iris_df.dtypes)

print("\nMissing Values:")
print(iris_df.isnull().sum())

# No missing values to clean in this dataset

# Task 2: Basic Data Analysis
print("\nBasic Statistics:")
print(iris_df.describe())

# Group by species and compute mean
grouped = iris_df.groupby('target')
print("\nMean values grouped by target (species):")
print(grouped.mean())

# Map target numbers to species names for better readability
iris_df['species'] = iris_df['target'].map(dict(zip(range(3), iris_raw.target_names)))

# Task 3: Data Visualization
sns.set(style="whitegrid")

# 1. Line chart (trend of petal length across samples)
plt.figure(figsize=(10, 5))
plt.plot(iris_df['petal length (cm)'], label='Petal Length')
plt.title('Petal Length Across Samples')
plt.xlabel('Sample Index')
plt.ylabel('Petal Length (cm)')
plt.legend()
plt.tight_layout()
plt.show()

# 2. Bar chart (average petal length per species)
plt.figure(figsize=(8, 5))
sns.barplot(data=iris_df, x='species', y='petal length (cm)', ci=None)
plt.title('Average Petal Length by Species')
plt.xlabel('Species')
plt.ylabel('Petal Length (cm)')
plt.tight_layout()
plt.show()

# 3. Histogram (distribution of sepal width)
plt.figure(figsize=(8, 5))
sns.histplot(iris_df['sepal width (cm)'], bins=15, kde=True)
plt.title('Distribution of Sepal Width')
plt.xlabel('Sepal Width (cm)')
plt.tight_layout()
plt.show()

# 4. Scatter plot (sepal length vs petal length)
plt.figure(figsize=(8, 5))
sns.scatterplot(data=iris_df, x='sepal length (cm)', y='petal length (cm)', hue='species')
plt.title('Sepal Length vs. Petal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Petal Length (cm)')
plt.legend(title='Species')
plt.tight_layout()
plt.show()
