In [None]:
# Step 1: Set Up the Environment
# Objective: Ensure you have the right tools and packages installed.

# 1. Install the required libraries.
# 2. Verify the installation by importing the libraries in a Python script or Jupyter notebook





# Step 2: Load & Explore the Dataset
# Objective: Load data into a pandas DataFrame and obtain a basic understanding of its structure.

# 3. Load a CSV file into a DataFrame.
# 4. Display the first few records to understand the structure.
# 5. Get a summary of the dataset.






# Step 3: Perform NumPy Operations
# Objective: Utilize NumPy for basic numerical operations and array manipulations.

# 6. Convert a DataFrame column to a NumPy array and perform array operations like mean and sum.
# 7. Create a NumPy array and calculate the variance and standard deviation.
# 8. Use NumPy to filter based on conditions.








# Step 4: Data Manipulation with Pandas
# Objective: Use Pandas to clean and manipulate dataset for analysis.

# 9. Handle missing data by filling or dropping.
# 10. Create new columns or modify existing ones.
# 11. Use groupby to aggregate data.







# Step 5: Data Visualization with Matplotlib & Seaborn
# Objective: Visualize the data to identify patterns, trends, and insights.

# 12. Use Matplotlib to create a basic plot.
# 13. Create a histogram using Seaborn.
# 14. Plot a box plot for a clear view of data distribution.







In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

print("✅ Libraries imported successfully.")

# -------------------------------------
# Step 2: Load & Explore the Dataset
# Objective: Load data into a pandas DataFrame and obtain a basic understanding of its structure.
# -------------------------------------

# Load dataset (using Seaborn’s built-in Iris dataset)
df = sns.load_dataset('iris')

# Display first few records
print("\n🔍 First 5 records:")
print(df.head())

# Dataset info
print("\n📋 Dataset Info:")
print(df.info())

# Summary statistics
print("\n📊 Statistical Summary:")
print(df.describe())

# -------------------------------------
# Step 3: Perform NumPy Operations
# Objective: Utilize NumPy for basic numerical operations and array manipulations.
# -------------------------------------

# Convert a DataFrame column to a NumPy array and compute statistics
sepal_lengths = df['sepal_length'].to_numpy()
print(f"\n🌿 Mean Sepal Length: {np.mean(sepal_lengths)}")
print(f"🌿 Sum of Sepal Lengths: {np.sum(sepal_lengths)}")

# Create a NumPy array and calculate variance and std deviation
array = np.array([1, 2, 3, 4, 5])
print(f"📈 Variance of array: {np.var(array)}")
print(f"📉 Standard Deviation of array: {np.std(array)}")

# Use NumPy to filter
long_sepals = sepal_lengths[sepal_lengths > 5.0]
print(f"🌱 Sepal lengths > 5.0:\n{long_sepals}")

# -------------------------------------
# Step 4: Data Manipulation with Pandas
# Objective: Use Pandas to clean and manipulate dataset for analysis.
# -------------------------------------

# Inject a missing value (for demonstration)
df.loc[0, 'sepal_length'] = np.nan

# Fill missing data with median
df['sepal_length'].fillna(df['sepal_length'].median(), inplace=True)

# Create a new column: petal_area
df['petal_area'] = df['petal_length'] * df['petal_width']

# Group by species and get mean
grouped = df.groupby('species').mean(numeric_only=True)
print("\n📊 Average Features by Species:")
print(grouped)

# -------------------------------------
# Step 5: Data Visualization with Matplotlib & Seaborn
# Objective: Visualize the data to identify patterns, trends, and insights.
# -------------------------------------

# Matplotlib line plot
plt.figure(figsize=(6, 4))
plt.plot(df['sepal_length'], label='Sepal Length', color='green')
plt.title("Sepal Length Over Entries")
plt.xlabel("Index")
plt.ylabel("Sepal Length (cm)")
plt.legend()
plt.grid(True)
plt.show()

# Seaborn histogram
sns.histplot(df['sepal_length'], bins=20, kde=True, color='skyblue')
plt.title("Histogram of Sepal Length")
plt.xlabel("Sepal Length (cm)")
plt.show()

# Seaborn box plot
sns.boxplot(x='species', y='petal_length', data=df, palette='pastel')
plt.title("Box Plot of Petal Length by Species")
plt.xlabel("Species")
plt.ylabel("Petal Length (cm)")
plt.show()