# Outlier Handling Guide with Examples
This notebook demonstrates how to detect and handle outliers using various techniques.

## Step 1: Create Sample Dataset

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt

# Sample dataset with an outlier
df = pd.DataFrame({
    'Salary': [30000, 32000, 35000, 37000, 40000, 1000000, 33000, 36000, 34000, 39000]
})
df

## Step 2: Detect Outliers using IQR

In [None]:
Q1 = df['Salary'].quantile(0.25)
Q3 = df['Salary'].quantile(0.75)
IQR = Q3 - Q1
lower = Q1 - 1.5 * IQR
upper = Q3 + 1.5 * IQR

outliers = df[(df['Salary'] < lower) | (df['Salary'] > upper)]
outliers

## Step 3: Detect Outliers using Z-Score

In [None]:
z_scores = np.abs(stats.zscore(df['Salary']))
df[z_scores > 3]

## Step 4: Visualize Outliers using Boxplot

In [None]:
sns.boxplot(x=df['Salary'])
plt.title('Boxplot of Salary')
plt.show()

## Step 5: Remove Outliers using IQR

In [None]:
df_no_outliers = df[(df['Salary'] >= lower) & (df['Salary'] <= upper)]
df_no_outliers

## Step 6: Cap/Floor Outliers

In [None]:
df_capped = df.copy()
df_capped['Salary'] = np.where(df['Salary'] > upper, upper, 
                                  np.where(df['Salary'] < lower, lower, df['Salary']))
df_capped

## Step 7: Apply Log Transformation

In [None]:
df['Log_Salary'] = np.log1p(df['Salary'])
sns.histplot(df['Log_Salary'], kde=True)
plt.title('Log-Transformed Salary')
plt.show()
df[['Salary', 'Log_Salary']]

## Step 8: Add Outlier Flag

In [None]:
df['is_outlier'] = (df['Salary'] < lower) | (df['Salary'] > upper)
df