In [None]:
Perform the following operations using Python by creating student
performance dataset.

1. Display Missing Values
2. Replace missing values using any 2 suitable
3. Identify outliers using IQR and ZScore
4.Handle outlier using any technique
5.Perform data normalization using decimal scaling

In [8]:
import pandas as pd
import numpy as np
from scipy import stats

# Create a sample student performance dataset
data = {
    'Name': ['John', 'Alice', 'Bob', 'Sarah', 'Mike'],
    'Math': [80, 90, np.nan, 85, 88],
    'Science': [85, np.nan, 92, 88, 90],
    'English': [np.nan, 85, 90, 92, 87]
}
df = pd.DataFrame(data)

# 1. Display Missing Values
print("Missing Values:")
print(df.isnull())

# 2. Replace missing values using mean and median
df_mean = df.fillna(df.select_dtypes(include=np.number).mean())
df_median = df.fillna(df.select_dtypes(include=np.number).median())

# 3. Identify outliers using IQR and ZScore
def detect_outliers_iqr(data):
    q1 = np.percentile(data, 25)
    q3 = np.percentile(data, 75)
    iqr = q3 - q1
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr
    outliers = data[(data < lower_bound) | (data > upper_bound)]
    return outliers

def detect_outliers_zscore(data):
    z_scores = np.abs(stats.zscore(data))
    threshold = 3
    outliers = data[(z_scores > threshold)]
    return outliers

outliers_iqr = detect_outliers_iqr(df_mean.select_dtypes(include=np.number))
outliers_zscore = detect_outliers_zscore(df_mean.select_dtypes(include=np.number))

print("\nOutliers Detected using IQR:")
print(outliers_iqr)
print("\nOutliers Detected using ZScore:")
print(outliers_zscore)

# 4. Handle outliers by replacing with NaN
df_mean[outliers_iqr.index] = np.nan

# 5. Perform data normalization using decimal scaling
def decimal_scaling(data):
    max_val = data.abs().max()
    return data / (10 ** np.ceil(np.log10(max_val)))

df_normalized = decimal_scaling(df_mean.select_dtypes(include=np.number))

print("\nAfter Handling Outliers and Normalization:")
print(df_normalized)


Missing Values:
    Name   Math  Science  English
0  False  False    False     True
1  False  False     True    False
2  False   True    False    False
3  False  False    False    False
4  False  False    False    False

Outliers Detected using IQR:
   Math  Science  English
0   NaN      NaN      NaN
1   NaN      NaN      NaN
2   NaN      NaN      NaN
3   NaN      NaN      NaN
4   NaN      NaN      NaN

Outliers Detected using ZScore:
   Math  Science  English
0   NaN      NaN      NaN
1   NaN      NaN      NaN
2   NaN      NaN      NaN
3   NaN      NaN      NaN
4   NaN      NaN      NaN

After Handling Outliers and Normalization:
     Math  Science  English   0   1   2   3   4
0  0.8000   0.8500    0.885 NaN NaN NaN NaN NaN
1  0.9000   0.8875    0.850 NaN NaN NaN NaN NaN
2  0.8575   0.9200    0.900 NaN NaN NaN NaN NaN
3  0.8500   0.8800    0.920 NaN NaN NaN NaN NaN
4  0.8800   0.9000    0.870 NaN NaN NaN NaN NaN
