# IMPROVED IQR METHOD

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from collections import defaultdict

In [3]:
# Load the dataset
file_path = 'cleaned_dataset.csv'  # Replace with actual file path
data = pd.read_csv(file_path)

# Define the sensor-related columns to check for outliers
sensor_columns = ['Acc X', 'Acc Y', 'Acc Z', 'gyro_x', 'gyro_y', 'gyro_z']

# Define the function to detect outliers using skew-adjusted Tukey's IQR method
def detect_outliers_adjusted_tukey(data, column, multiplier=1.5):
    Q1 = data[column].quantile(0.25)
    Q3 = data[column].quantile(0.75)
    IQR = Q3 - Q1

    # Calculate skewness
    skew = data[column].skew()

    # Adjust multipliers based on skewness
    lower_mult = multiplier * (1 - skew)
    upper_mult = multiplier * (1 + skew)
    
    # Calculate skew-adjusted bounds
    lower_bound = Q1 - lower_mult * IQR
    upper_bound = Q3 + upper_mult * IQR

    # Identify outliers based on adjusted bounds
    outliers = data[(data[column] < lower_bound) | (data[column] > upper_bound)]
    
    return IQR, outliers, lower_bound, upper_bound

# Apply the function to each sensor column and print the results
for col in sensor_columns:
    iqr, outliers, lower_bound, upper_bound = detect_outliers_adjusted_tukey(data, col, multiplier=1.5)
    print(f"Column: {col}")
    print(f"IQR: {iqr}")
    print(f"Lower Bound: {lower_bound}, Upper Bound: {upper_bound}")
    print(f"Outliers Detected: {len(outliers)}")
    print(outliers)  # Display outlier rows for each column
    print("\n")

Column: Acc X
IQR: 0.3895092391999997
Lower Bound: -0.8505992423053192, Upper Bound: 0.7074377144946795
Outliers Detected: 2020
       Longitude   Latitude  Speed     Distance      Time     Acc X     Acc Y  \
96     73.822660  18.501627  0.000     0.025645  18-45-22  0.832253 -0.298408   
97     73.822660  18.501627  0.000     0.025645  18-45-22  0.799070 -0.243648   
98     73.822660  18.501627  0.000     0.025645  18-45-22  0.870786 -0.109471   
99     73.822660  18.501627  0.000     0.025645  18-45-22  0.742543 -0.062337   
164    73.822520  18.501616  0.732    14.906174  18-45-29 -1.082216  1.019267   
...          ...        ...    ...          ...       ...       ...       ...   
14191  73.822813  18.501587  0.000  2096.573200  17-27-29  1.520395  1.565442   
14192  73.822813  18.501587  0.000  2096.573200  17-27-29  1.229296  1.503951   
14193  73.822813  18.501587  0.000  2096.573200  17-27-29  1.619212  1.501497   
14201  73.822813  18.501587  0.000  2096.573200  17-27-30 -0.9