In [1]:
import pandas as pd
import numpy as np

data = {
    'Age': [18, 22, 25, 120, np.nan, 30, 29, 100],
    'Marks': [80, 85, np.nan, 75, 95, 92, 88, 91],
    'Dept': ["CSE", "ECE", "ECE", "CSE", "IT", "CSE", np.nan, "ECE"]
}

df = pd.DataFrame(data)
print("Original Data:\n", df)

df['Marks'].fillna(df['Marks'].mean(), inplace=True)
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Dept'].fillna(df['Dept'].mode()[0], inplace=True)

Q1 = df['Age'].quantile(0.25)
Q3 = df['Age'].quantile(0.75)
IQR = Q3 - Q1

df = df[(df['Age'] >= Q1 - 1.5 * IQR) & (df['Age'] <= Q3 + 1.5 * IQR)]

df_selected = df[['Age', 'Marks']]

df_selected['Marks_Category'] = pd.cut(
    df_selected['Marks'],
    bins=[0, 70, 85, 100],
    labels=["Low", "Medium", "High"]
)

print("\nAfter Preprocessing:\n", df_selected)


Original Data:
      Age  Marks Dept
0   18.0   80.0  CSE
1   22.0   85.0  ECE
2   25.0    NaN  ECE
3  120.0   75.0  CSE
4    NaN   95.0   IT
5   30.0   92.0  CSE
6   29.0   88.0  NaN
7  100.0   91.0  ECE

After Preprocessing:
     Age      Marks Marks_Category
0  18.0  80.000000         Medium
1  22.0  85.000000         Medium
2  25.0  86.571429           High
4  29.0  95.000000           High
5  30.0  92.000000           High
6  29.0  88.000000           High


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Marks'].fillna(df['Marks'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are sett