In [None]:
Perform the following operations using Python by creating student
performance dataset.

1. Display Missing Values
2. Replace missing values using any 2 suitable
3. Identify outliers using IQR and ZScore
4. Handle outlier using any technique
5. Perform data normalization using Min Max

In [12]:
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.preprocessing import MinMaxScaler

# Load the dataset
data = {
    'Name': ['John', 'Alice', 'Bob', 'Sarah', 'Mike'],
    'Age': [20, np.nan, 22, 23, 24],
    'Score': [85, 90, np.nan, 88, 92],
    'Attendance': [85, np.nan, 90, 92, 87]
}
df = pd.DataFrame(data)

# 1. Display Missing Values
print("Missing Values:")
print(df.isnull())

# 2. Replace missing values using mean and median
numeric_columns = df.select_dtypes(include=np.number).columns
df_mean = df.fillna(df[numeric_columns].mean())
df_median = df.fillna(df[numeric_columns].median())

# 3. Identify outliers using IQR and ZScore
def detect_outliers_iqr(data):
    q1 = np.percentile(data, 25)
    q3 = np.percentile(data, 75)
    iqr = q3 - q1
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr
    outliers = data[(data < lower_bound) | (data > upper_bound)]
    return outliers

def detect_outliers_zscore(data):
    z_scores = np.abs(stats.zscore(data))
    threshold = 3
    outliers = data[(z_scores > threshold)]
    return outliers

outliers_iqr = detect_outliers_iqr(df_mean.select_dtypes(include=np.number))
outliers_zscore = detect_outliers_zscore(df_mean.select_dtypes(include=np.number))

print("\nOutliers Detected using IQR:")
print(outliers_iqr)
print("\nOutliers Detected using ZScore:")
print(outliers_zscore)

# 4. Handle outlier using any technique
# Replace outliers with NaN in the original DataFrame
df_mean[outliers_iqr.index] = np.nan

# 5. Perform data normalization using Min Max
def min_max_scaling(data):
    min_val = data.min()
    max_val = data.max()
    return (data - min_val) / (max_val - min_val)

df_normalized = df_mean.select_dtypes(include=np.number).apply(min_max_scaling)

print("\nAfter Handling Outliers and Normalization:")
print(df_normalized)


Missing Values:
    Name    Age  Score  Attendance
0  False  False  False       False
1  False   True  False        True
2  False  False   True       False
3  False  False  False       False
4  False  False  False       False

Outliers Detected using IQR:
   Age  Score  Attendance
0  NaN    NaN         NaN
1  NaN    NaN         NaN
2  NaN    NaN         NaN
3  NaN    NaN         NaN
4  NaN    NaN         NaN

Outliers Detected using ZScore:
   Age  Score  Attendance
0  NaN    NaN         NaN
1  NaN    NaN         NaN
2  NaN    NaN         NaN
3  NaN    NaN         NaN
4  NaN    NaN         NaN

After Handling Outliers and Normalization:
      Age     Score  Attendance   0   1   2   3   4
0  0.0000  0.000000    0.000000 NaN NaN NaN NaN NaN
1  0.5625  0.714286    0.500000 NaN NaN NaN NaN NaN
2  0.5000  0.535714    0.714286 NaN NaN NaN NaN NaN
3  0.7500  0.428571    1.000000 NaN NaN NaN NaN NaN
4  1.0000  1.000000    0.285714 NaN NaN NaN NaN NaN
