## 4. Handling Outliers in the Boston Housing Dataset
   - Task: Identify and handle outliers in the Boston Housing dataset using techniques like Z-score, IQR, and visualization methods.
   - Dataset: Boston Housing Dataset


## Loading Data


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import zscore

In [None]:
df = pd.read_csv('/kaggle/input/bostonhoustingmlnd/housing.csv')
df.head()

## Plotting to See Outliers

In [None]:
# Plot box plots for each feature

plt.figure(figsize=(10, 6))
df.boxplot()
plt.xticks(rotation=45)
plt.title('Box plot of all features')
plt.show()

## Outlier Removal Method

In [None]:

# Calculate Z-scores
z_scores = np.abs(zscore(df))

# Identify outliers
z_outliers = np.where(z_scores > 3)
print("Z-score outliers:", z_outliers)


In [None]:
# Calculate IQR for each feature
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1

# Identify outliers
iqr_outliers = (df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))
print("IQR outliers:\n", iqr_outliers)


In [None]:
# Remove outliers using IQR method
boston_data_cleaned = df[~iqr_outliers.any(axis=1)]

# Display the shape of the dataset before and after removing outliers
print("Original dataset shape:", df.shape)
print("Cleaned dataset shape:", boston_data_cleaned.shape)


## Plotting after removing outliers

In [None]:
# Plot box plots for each feature after removing outliers

plt.figure(figsize=(20, 15))
boston_data_cleaned.boxplot()
plt.xticks(rotation=45)
plt.title('Box plot of all features after removing outliers')
plt.show()
