In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Set a clean style for the plots
sns.set_style("whitegrid")
plt.style.use("seaborn-v0_8-whitegrid")

# --- Task 1: Load and Explore the Dataset ---
print("--- Task 1: Data Loading and Exploration ---")

# Load the Iris dataset from scikit-learn
iris = load_iris()
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['species'] = iris.target
iris_df['species'] = iris_df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

# Display the first few rows to inspect the data
print("\nFirst 5 rows of the dataset:")
print(iris_df.head())

# Explore the structure and check for missing values
print("\nDataset Information:")
iris_df.info()

print("\nMissing values per column:")
print(iris_df.isnull().sum())
print("Conclusion: The dataset is clean with no missing values.")

# --- Task 2: Basic Data Analysis ---
print("\n\n--- Task 2: Basic Data Analysis ---")

# Compute basic statistics of the numerical columns
print("\nBasic statistics of numerical columns:")
print(iris_df.describe())

# Group the data by species and compute the mean of petal length
print("\nAverage petal length per species:")
avg_petal_length = iris_df.groupby('species')['petal length (cm)'].mean()
print(avg_petal_length)

# Identify any patterns or interesting findings
print("\nFindings from analysis:")
print("- Virginica has the largest petals and sepals on average.")
print("- Setosa has the smallest petals and sepals on average.")

# --- Task 3: Data Visualization ---
print("\n\n--- Task 3: Data Visualization ---")

plt.figure(figsize=(15, 10))

# 1. Bar Chart: Average Petal Length per Species
plt.subplot(2, 2, 1)
avg_petal_length.plot(kind='bar', color=sns.color_palette("viridis"))
plt.title('Average Petal Length per Species', fontsize=14)
plt.xlabel('Species', fontsize=12)
plt.ylabel('Petal Length (cm)', fontsize=12)
plt.xticks(rotation=0)

# 2. Histogram: Distribution of Sepal Width
plt.subplot(2, 2, 2)
sns.histplot(data=iris_df, x='sepal width (cm)', kde=True, color='purple', bins=20)
plt.title('Distribution of Sepal Width', fontsize=14)
plt.xlabel('Sepal Width (cm)', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

# 3. Scatter Plot: Sepal Length vs. Petal Length
plt.subplot(2, 2, 3)
sns.scatterplot(data=iris_df, x='sepal length (cm)', y='petal length (cm)', hue='species', style='species', s=100)
plt.title('Relationship between Sepal and Petal Length', fontsize=14)
plt.xlabel('Sepal Length (cm)', fontsize=12)
plt.ylabel('Petal Length (cm)', fontsize=12)
plt.legend(title='Species')

# 4. Line Chart (for a time-series-like visualization)
plt.subplot(2, 2, 4)
iris_df_sorted = iris_df.sort_values(by='sepal length (cm)')
sns.lineplot(data=iris_df_sorted, x=iris_df_sorted.index, y='sepal length (cm)', marker='o')
plt.title('Sepal Length Trend (Sorted by Length)', fontsize=14)
plt.xlabel('Data Point Index (Sorted)', fontsize=12)
plt.ylabel('Sepal Length (cm)', fontsize=12)

plt.tight_layout()
plt.show()

print("\nVisualizations have been created successfully.")

