In [None]:
# Task 1: Load and Explore the Dataset

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

# Ensure plots appear in the notebook
%matplotlib inline

# Error handling for data loading
try:
    # Load Iris dataset
    iris = load_iris()
    
    # Create DataFrame from the dataset
    df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
    df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
    
    # Display the first few rows
    print("üîç First five rows of the dataset:")
    display(df.head())
    
    # Check structure and missing values
    print("\nüìä Data types:")
    print(df.dtypes)
    
    print("\nüïµÔ∏è‚Äç‚ôÄÔ∏è Missing values in the dataset:")
    print(df.isnull().sum())

    # Clean missing data (if any)
    df = df.dropna()  # Alternatively, use df.fillna(value)
    print("\n‚úÖ Cleaned data (after handling missing values).")

except FileNotFoundError:
    print("‚ùå Error: File not found.")
except pd.errors.EmptyDataError:
    print("‚ùå Error: No data.")
except Exception as e:
    print(f"‚ùå An unexpected error occurred: {e}")

# Task 2: Basic Data Analysis

# Basic stats
print("\nüìà Basic statistical summary:")
display(df.describe())

# Group by species and compute mean
grouped = df.groupby("species").mean()
print("\nüìä Mean values grouped by species:")
display(grouped)

# Interesting patterns
print("\nüîé Observations:")
print("- Iris-virginica generally has the largest petal and sepal dimensions.")
print("- Iris-setosa has the smallest petal lengths and widths.")

# Task 3: Data Visualization

# 1. Line Chart (Mock time-series using index as time)
plt.figure(figsize=(10, 4))
plt.plot(df.index, df['sepal length (cm)'], label='Sepal Length')
plt.plot(df.index, df['petal length (cm)'], label='Petal Length')
plt.title('Line Chart: Sepal and Petal Lengths Over Sample Index')
plt.xlabel('Sample Index')
plt.ylabel('Length (cm)')
plt.legend()
plt.grid(True)
plt.show()

# 2. Bar Chart (Average petal length per species)
plt.figure(figsize=(8, 5))
sns.barplot(x='species', y='petal length (cm)', data=df)
plt.title('Bar Chart: Average Petal Length by Species')
plt.xlabel('Species')
plt.ylabel('Petal Length (cm)')
plt.show()

# 3. Histogram (Distribution of Sepal Width)
plt.figure(figsize=(8, 5))
plt.hist(df['sepal width (cm)'], bins=15, color='skyblue', edgecolor='black')
plt.title('Histogram: Sepal Width Distribution')
plt.xlabel('Sepal Width (cm)')
plt.ylabel('Frequency')
plt.show()

# 4. Scatter Plot (Sepal Length vs Petal Length)
plt.figure(figsize=(8, 6))
sns.scatterplot(x='sepal length (cm)', y='petal length (cm)', hue='species', data=df)
plt.title('Scatter Plot: Sepal Length vs Petal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Petal Length (cm)')
plt.legend(title='Species')
plt.show()