In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

# Example Data: Generate a dataset (replace with your actual dataset)
data = np.random.normal(0, 1, 1000)  # Normal distribution (mean=0, std=1)

# 1. Z-score Method
z_scores = np.abs(stats.zscore(data))  # Calculate the Z-scores
z_outliers = np.where(z_scores > 3)  # Z-scores greater than 3 are outliers

# 2. IQR Method
Q1 = np.percentile(data, 25)  # 25th percentile
Q3 = np.percentile(data, 75)  # 75th percentile
IQR = Q3 - Q1  # Interquartile Range (IQR)

lower_bound = Q1 - 1.5 * IQR  # Lower bound for outliers
upper_bound = Q3 + 1.5 * IQR  # Upper bound for outliers

# Identify outliers using the IQR method
iqr_outliers = np.where((data < lower_bound) | (data > upper_bound))

# 3. Boxplot Method
plt.boxplot(data)
plt.title('Boxplot of the Dataset')
plt.show()

# Combined outlier detection from all methods

# Combine the outliers from Z-score, IQR, and Boxplot (boxplot uses the same bounds as IQR method)
combined_outliers = set(z_outliers[0]).union(set(iqr_outliers[0]))

# Display the outliers detected by each method
print(f"Z-score Method Outliers (Indices): {z_outliers[0]}")
print(f"IQR Method Outliers (Indices): {iqr_outliers[0]}")
print(f"Combined Outliers (Indices): {combined_outliers}")

# Visualize the outliers detected by all methods
# Plotting the original data and highlighting the outliers

plt.figure(figsize=(10, 6))
plt.plot(data, label="Data")
plt.scatter(z_outliers[0], data[z_outliers[0]], color='red', label="Z-score Outliers", marker='x')
plt.scatter(iqr_outliers[0], data[iqr_outliers[0]], color='blue', label="IQR Outliers", marker='o')
plt.scatter(list(combined_outliers), data[list(combined_outliers)], color='green', label="Combined Outliers", marker='*')
plt.legend()
plt.title('Outliers Detected by Z-score, IQR, and Combined Methods')
plt.show()

# Summary of Outlier Detection
outlier_summary = {
    "Z-score Outliers": z_outliers[0],
    "IQR Outliers": iqr_outliers[0],
    "Combined Outliers": list(combined_outliers)
}

# Display summary table
outlier_summary_df = pd.DataFrame(outlier_summary)
outlier_summary_df.head()