In [None]:
# Question: Combining Multiple Outlier Detection Methods
# Description: Combine Z-score, IQR, and Boxplot methods to detect and compare outliers in adataset.




In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Sample dataset
data = pd.DataFrame({
    'Values': [10, 12, 12, 13, 12, 12, 14, 15, 16, 100, 12, 13, 14, 11, 15, 12, 200]
})

# 1. Z-score Method
from scipy.stats import zscore
data['Z_score'] = zscore(data['Values'])
zscore_outliers = data[np.abs(data['Z_score']) > 3]['Values']

# 2. IQR Method
Q1 = data['Values'].quantile(0.25)
Q3 = data['Values'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
iqr_outliers = data[(data['Values'] < lower_bound) | (data['Values'] > upper_bound)]['Values']

# 3. Boxplot Method (visual)
plt.figure(figsize=(8,5))
plt.boxplot(data['Values'], vert=False)
plt.title('Boxplot of Values')
plt.show()

# Mark detected outliers
print("Outliers detected by Z-score method:", zscore_outliers.values)
print("Outliers detected by IQR method:", iqr_outliers.values)

# Optional: Add columns indicating outliers detected by each method
data['Z_outlier'] = np.abs(data['Z_score']) > 3
data['IQR_outlier'] = (data['Values'] < lower_bound) | (data['Values'] > upper_bound)

print("\nData with Outlier Flags:")
print(data[['Values', 'Z_score', 'Z_outlier', 'IQR_outlier']])
