In [1]:
import pandas as pd

# Sample dataset
data = pd.DataFrame({
    'A': [10, 12, 14, 15, 100],
    'B': [20, 22, 25, 23, -10],
    'C': [30, 29, 31, 32, 250]
})

print("Original Data:")
print(data)

Original Data:
     A   B    C
0   10  20   30
1   12  22   29
2   14  25   31
3   15  23   32
4  100 -10  250


In [2]:
def find_outliers_iqr(data):
    outliers = []
    for col in data.columns:
        q1 = data[col].quantile(0.25)
        q3 = data[col].quantile(0.75)
        iqr = q3 - q1
        lower_bound = q1 - 1.5 * iqr
        upper_bound = q3 + 1.5 * iqr

        col_outliers = data[(data[col] < lower_bound) | (data[col] > upper_bound)][col]
        outliers.append(col_outliers)
    return outliers

In [3]:
outliers = find_outliers_iqr(data)

for i, outlier_list in enumerate(outliers):
    print(f"Outliers in column {data.columns[i]}: {outlier_list.values}")

Outliers in column A: [100]
Outliers in column B: [-10]
Outliers in column C: [250]


In [4]:
def remove_outliers_iqr(data):
    data_clean = data.copy()
    for col in data_clean.select_dtypes(include=['number']).columns:
        q1 = data_clean[col].quantile(0.25)
        q3 = data_clean[col].quantile(0.75)
        iqr = q3 - q1
        lower_bound = q1 - 1.5 * iqr
        upper_bound = q3 + 1.5 * iqr

        # Keep only non-outliers
        data_clean = data_clean[(data_clean[col] >= lower_bound) & (data_clean[col] <= upper_bound)]
    return data_clean

In [5]:
data_clean = remove_outliers_iqr(data)

print("Data after removing outliers:")
print(data_clean)

Data after removing outliers:
    A   B   C
0  10  20  30
1  12  22  29
2  14  25  31
3  15  23  32
