# Interquartile Range (IQR)

In [1]:
#The Interquartile Range (IQR) is a measure of statistical dispersion, which is the spread of the middle 50% of a dataset. The IQR is calculated as the difference between the third quartile (Q3) and the first quartile (Q1).

# Steps to Identify Outliers Using IQR:
# Calculate Q1 (25th percentile) and Q3 (75th percentile).
# Calculate the IQR:
# IQR=𝑄3−𝑄1
# IQR=Q3−Q1
# Determine the Lower and Upper Bounds:
# Lower bound = 𝑄1−1.5×IQR
# Upper bound = 𝑄3+1.5×IQR
# Identify outliers: Any data point that is lower than the lower bound or higher than the upper bound is considered an outlier.

import numpy as np
import pandas as pd

# Example data
data = [10, 12, 14, 15, 18, 22, 24, 28, 30, 100]  # 100 is an outlier
df = pd.DataFrame(data, columns=['Value'])

# Calculate Q1 (25th percentile) and Q3 (75th percentile)
Q1 = df['Value'].quantile(0.25)
Q3 = df['Value'].quantile(0.75)

# Calculate IQR
IQR = Q3 - Q1

# Calculate bounds
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Identify outliers
outliers = df[(df['Value'] < lower_bound) | (df['Value'] > upper_bound)]
print("Outliers using IQR method:\n", outliers)


Outliers using IQR method:
    Value
9    100


In [2]:
# The Z-score measures how many standard deviations a data point is from the mean. The Z-score of a data point is calculated as:
# 𝑍=(𝑋−𝜇)/𝜎
# Where:
# X is the data point,
# μ is the mean of the data,
# σ is the standard deviation.

# Steps to Identify Outliers Using Z-score:
# Calculate the mean and standard deviation of the dataset.
# Calculate the Z-score for each data point.
# Identify outliers: A data point is considered an outlier if its Z-score is above a certain threshold, commonly 3 or -3 (i.e., 3 standard deviations away from the mean).

from scipy import stats

# Example data
data = [10, 12, 14, 15, 18, 22, 24, 28, 30, 100]  # 100 is an outlier
df = pd.DataFrame(data, columns=['Value'])

# Calculate the Z-scores
df['Z-score'] = stats.zscore(df['Value'])

# Identify outliers with Z-score > 3 or < -3
outliers = df[(df['Z-score'] > 3) | (df['Z-score'] < -3)]
print("Outliers using Z-score method:\n", outliers)


Outliers using Z-score method:
 Empty DataFrame
Columns: [Value, Z-score]
Index: []


In [3]:
# Understanding Correlation and Covariance
import numpy as np

# Sample data
height = [150, 160, 170, 180, 190]
weight = [50, 60, 70, 80, 90]

# Calculate covariance
cov_matrix = np.cov(height, weight)
print("Covariance Matrix:\n", cov_matrix)


Covariance Matrix:
 [[250. 250.]
 [250. 250.]]


In [4]:
import numpy as np

# Sample data
height = [150, 160, 170, 180, 190]
weight = [50, 60, 70, 80, 90]

# Calculate correlation
corr_matrix = np.corrcoef(height, weight)
print("Correlation Matrix:\n", corr_matrix)


Correlation Matrix:
 [[1. 1.]
 [1. 1.]]
