In [1]:

# Title: Data Cleaning using Pandas
# Description: Check for missing values and handle them by imputing the median.



import pandas as pd
import numpy as np

# Sample dataset with missing values
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, np.nan, 30, np.nan, 22],
    'Salary': [50000, 60000, np.nan, 58000, 52000]
}

df = pd.DataFrame(data)

# Check for missing values
print("Missing values before imputation:")
print(df.isnull().sum())

# Impute missing values with median
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Salary'].fillna(df['Salary'].median(), inplace=True)

# Check again for missing values
print("\nMissing values after imputation:")
print(df.isnull().sum())

# Display cleaned DataFrame
print("\nCleaned DataFrame:")
print(df)


Missing values before imputation:
Name      0
Age       2
Salary    1
dtype: int64

Missing values after imputation:
Name      0
Age       0
Salary    0
dtype: int64

Cleaned DataFrame:
      Name   Age   Salary
0    Alice  25.0  50000.0
1      Bob  25.0  60000.0
2  Charlie  30.0  55000.0
3    David  25.0  58000.0
4      Eve  22.0  52000.0


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Salary'].fillna(df['Salary'].median(), inplace=True)
