In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

data = {
    'Student_ID': range(1, 11),
    'Math_Score': [85, 92, np.nan, 45, 120, 88, 76, 95, 30, 82], 
    'Reading_Score': [78, 'na', 82, 91, 85, 77, 89, 10, 93, 80], 
    'Attendance': [95, 80, 85, 90, 92, 110, 88, 75, 82, 85] 
}

df = pd.DataFrame(data)
print("Initial Dataset:\n", df)

Initial Dataset:
    Student_ID  Math_Score Reading_Score  Attendance
0           1        85.0            78          95
1           2        92.0            na          80
2           3         NaN            82          85
3           4        45.0            91          90
4           5       120.0            85          92
5           6        88.0            77         110
6           7        76.0            89          88
7           8        95.0            10          75
8           9        30.0            93          82
9          10        82.0            80          85


In [2]:
df['Reading_Score'] = pd.to_numeric(df['Reading_Score'], errors='coerce')

df['Math_Score'] = df['Math_Score'].fillna(df['Math_Score'].median())
df['Reading_Score'] = df['Reading_Score'].fillna(df['Reading_Score'].median())

df.loc[df['Attendance'] > 100, 'Attendance'] = 100

print("\nAfter Cleaning Inconsistencies:\n", df)



After Cleaning Inconsistencies:
    Student_ID  Math_Score  Reading_Score  Attendance
0           1        85.0           78.0          95
1           2        92.0           82.0          80
2           3        85.0           82.0          85
3           4        45.0           91.0          90
4           5       120.0           85.0          92
5           6        88.0           77.0         100
6           7        76.0           89.0          88
7           8        95.0           10.0          75
8           9        30.0           93.0          82
9          10        82.0           80.0          85


In [3]:
def handle_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    df[column] = np.clip(df[column], lower_bound, upper_bound)
    return df

for col in ['Math_Score', 'Reading_Score']:
    df = handle_outliers(df, col)

print("\nAfter Outlier Treatment:\n", df)


After Outlier Treatment:
    Student_ID  Math_Score  Reading_Score  Attendance
0           1       85.00          78.00          95
1           2       92.00          82.00          80
2           3       85.00          82.00          85
3           4       57.25          91.00          90
4           5      111.25          85.00          92
5           6       88.00          77.00         100
6           7       76.00          89.00          88
7           8       95.00          64.25          75
8           9       57.25          93.00          82
9          10       82.00          80.00          85


In [4]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

df['Math_Scaled'] = scaler.fit_transform(df[['Math_Score']])

print("\nAfter Min-Max Transformation on Math_Score:\n", df[['Student_ID', 'Math_Score', 'Math_Scaled']])


After Min-Max Transformation on Math_Score:
    Student_ID  Math_Score  Math_Scaled
0           1       85.00     0.513889
1           2       92.00     0.643519
2           3       85.00     0.513889
3           4       57.25     0.000000
4           5      111.25     1.000000
5           6       88.00     0.569444
6           7       76.00     0.347222
7           8       95.00     0.699074
8           9       57.25     0.000000
9          10       82.00     0.458333
