In [3]:
import pandas as pd
import numpy as np

# --------------------------------------------------
# 1️⃣ Create realistic dataset (Height + SAT Score)
# --------------------------------------------------
data = {
    "Student": ["Amit","Rahul","Sneha","Karan","Pooja","Anita","Ravi","Neha","Arjun","Meera"],
    "Height_cm": [160, 170, 165, 180, 175, 172, 168, 169, 171, 210],   # 210 is extreme height
    "SAT_Score": [900, 1100, 1000, 1400, 1200, 1150, 1050, 1080, 1120, 1550]  # 1550 is extreme score
}

df = pd.DataFrame(data)

print("Original Dataset:")
print(df)

# --------------------------------------------------
# 2️⃣ Calculate Mean and Standard Deviation
# --------------------------------------------------
height_mean = df["Height_cm"].mean()
height_std = df["Height_cm"].std()

sat_mean = df["SAT_Score"].mean()
sat_std = df["SAT_Score"].std()

print("\nHeight Mean:", height_mean)
print("Height Std Dev:", height_std)

print("\nSAT Mean:", sat_mean)
print("SAT Std Dev:", sat_std)

# --------------------------------------------------
# 3️⃣ Calculate Z-scores
# Z = (x - mean) / std
# --------------------------------------------------
df["Height_Z"] = (df["Height_cm"] - height_mean) / height_std
df["SAT_Z"] = (df["SAT_Score"] - sat_mean) / sat_std

print("\nDataset with Z-scores:")
print(df)

# --------------------------------------------------
# 4️⃣ Detect Outliers |Z| > 3
# --------------------------------------------------
outliers = df[(np.abs(df["Height_Z"]) > 3) | (np.abs(df["SAT_Z"]) > 3)]

print("\nOutliers detected:")
print(outliers)


Original Dataset:
  Student  Height_cm  SAT_Score
0    Amit        160        900
1   Rahul        170       1100
2   Sneha        165       1000
3   Karan        180       1400
4   Pooja        175       1200
5   Anita        172       1150
6    Ravi        168       1050
7    Neha        169       1080
8   Arjun        171       1120
9   Meera        210       1550

Height Mean: 174.0
Height Std Dev: 13.743685418725535

SAT Mean: 1155.0
SAT Std Dev: 190.91883092036784

Dataset with Z-scores:
  Student  Height_cm  SAT_Score  Height_Z     SAT_Z
0    Amit        160        900 -1.018650 -1.335646
1   Rahul        170       1100 -0.291043 -0.288081
2   Sneha        165       1000 -0.654846 -0.811863
3   Karan        180       1400  0.436564  1.283268
4   Pooja        175       1200  0.072761  0.235702
5   Anita        172       1150 -0.145521 -0.026189
6    Ravi        168       1050 -0.436564 -0.549972
7    Neha        169       1080 -0.363803 -0.392837
8   Arjun        171       1120 -