In [40]:
import pandas as pd
import numpy as np

# Replacing Missing Value with Mean - for Numericals
df = pd.DataFrame({'Age':[25,30,None,22,28]})
print(f'Data Before: \n{df}')
print(f'\nChecking Missing Data Before: \n{df.isnull().sum()}')

df['Age'].fillna(df['Age'].mean(), inplace=True)
print(f'\nChecking Missing Data After: \n{df.isnull().sum()}')

print(f'\nAfter Treating Missing Data: \n{df}')

Data Before: 
    Age
0  25.0
1  30.0
2   NaN
3  22.0
4  28.0

Checking Missing Data Before: 
Age    1
dtype: int64

Checking Missing Data After: 
Age    0
dtype: int64

After Treating Missing Data: 
     Age
0  25.00
1  30.00
2  26.25
3  22.00
4  28.00


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].mean(), inplace=True)


In [41]:
# Replacing with the Median
df1 = pd.DataFrame({'Salary': [50000,60000,None,55000,52000]})
df1.info()
df1.isnull().sum()
df1['Salary'].fillna(df1['Salary'].median(), inplace = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Salary  4 non-null      float64
dtypes: float64(1)
memory usage: 172.0 bytes


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df1['Salary'].fillna(df1['Salary'].median(), inplace = True)


In [21]:
# Replacing with Modal Value
import pandas as pd
df = pd.DataFrame({'Color':['Red', 'Blue', None, 'Blue', 'Red']})
df.info()
df.isnull().sum()

mode_value = df['Color'].mode()[0]
df['Color'].fillna(mode_value, inplace = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Color   4 non-null      object
dtypes: object(1)
memory usage: 172.0+ bytes


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Color'].fillna(mode_value, inplace = True)


In [12]:
df.isnull()
df

Unnamed: 0,Color
0,Red
1,Blue
2,Blue
3,Blue
4,Red
