In [1]:
import pandas as pd
import numpy as np

# Creating a sample dataset with missing values
data = {
    "Name": ["Alice", "Bob", "Charlie", "David", "Eve"],
    "Age": [25, np.nan, 30, np.nan, 40],
    "City": ["New York", "Los Angeles", np.nan, "Chicago", "Houston"]
}

df = pd.DataFrame(data)
print("Original DataFrame:\n", df)

# Detect missing values
print("\nMissing Values:\n", df.isnull().sum())

# Fill missing values
df["Age"].fillna(df["Age"].mean(), inplace=True)  # Fill with mean age
df["City"].fillna("Unknown", inplace=True)        # Fill missing city with 'Unknown'

print("\nCleaned DataFrame:\n", df)

# Save cleaned data to CSV
df.to_csv("cleaned_data.csv", index=False)
print("\nCleaned data saved as 'cleaned_data.csv'")

Original DataFrame:
       Name   Age         City
0    Alice  25.0     New York
1      Bob   NaN  Los Angeles
2  Charlie  30.0          NaN
3    David   NaN      Chicago
4      Eve  40.0      Houston

Missing Values:
 Name    0
Age     2
City    1
dtype: int64

Cleaned DataFrame:
       Name        Age         City
0    Alice  25.000000     New York
1      Bob  31.666667  Los Angeles
2  Charlie  30.000000      Unknown
3    David  31.666667      Chicago
4      Eve  40.000000      Houston

Cleaned data saved as 'cleaned_data.csv'


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Age"].fillna(df["Age"].mean(), inplace=True)  # Fill with mean age
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["City"].fillna("Unknown", inplace=True)        # Fill missing city with 'Unknown'
