In [3]:
import pandas as pd
import numpy as np

# Step 1: Create a DataFrame with some invalid data
data = {
    "Name": ["Liam", "Sophia", "Invalid Data", "Olivia", "Ethan"],
    "Age": [28, np.nan, 34, 21, np.nan],
    "Height": [172, 158, np.nan, 165, 180],
    "Weight": [70, 55, 68, np.nan, 80],
    "Grade": ["B", "A", np.nan, "C", "B"],
    "Gender": ["Male", "Female", "Unknown", "Female", "Male"],
}

df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# Step 2: Remove rows with invalid Name entries
df_cleaned = df[df["Name"] != "Invalid Data"]

# Step 3: Handle missing values in the cleaned DataFrame
df_cleaned["Age"].fillna(df_cleaned["Age"].mean(), inplace=True)
df_cleaned["Height"].fillna(df_cleaned["Height"].median(), inplace=True)
df_cleaned["Weight"].fillna(df_cleaned["Weight"].mean(), inplace=True)
df_cleaned["Grade"].fillna(df_cleaned["Grade"].mode()[0], inplace=True)

print("\nCleaned DataFrame:")
print(df_cleaned)

Original DataFrame:
           Name   Age  Height  Weight Grade   Gender
0          Liam  28.0   172.0    70.0     B     Male
1        Sophia   NaN   158.0    55.0     A   Female
2  Invalid Data  34.0     NaN    68.0   NaN  Unknown
3        Olivia  21.0   165.0     NaN     C   Female
4         Ethan   NaN   180.0    80.0     B     Male

Cleaned DataFrame:
     Name   Age  Height     Weight Grade  Gender
0    Liam  28.0   172.0  70.000000     B    Male
1  Sophia  24.5   158.0  55.000000     A  Female
3  Olivia  21.0   165.0  68.333333     C  Female
4   Ethan  24.5   180.0  80.000000     B    Male


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_cleaned["Age"].fillna(df_cleaned["Age"].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned["Age"].fillna(df_cleaned["Age"].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)'