# Handling missing values

In [1]:
import pandas as pd

df = pd.read_csv("iris.csv")

# Drop the 'flower' column
df = df.drop(columns=['flower'])

# Convert to numeric (if there are stray text values, they become NaN)
numeric_cols = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

# Now safely fill NaNs
df['sepal_length'] = df['sepal_length'].fillna(df['sepal_length'].mean())   # mean
df['sepal_width']  = df['sepal_width'].fillna(df['sepal_width'].median())  # median
df['petal_length'] = df['petal_length'].fillna(0)                          # 0
df['petal_width']  = df['petal_width'].fillna(0)                           # 0

# Alternative: fill all numeric NaNs with median
df = df.fillna(df.median(numeric_only=True))

# Rows with all NaN
print(df[df.isnull().all(axis=1)])

# Suspicious rows with all zeros
print(df[(df == 0).all(axis=1)])


Empty DataFrame
Columns: [sepal_length, sepal_width, petal_length, petal_width]
Index: []
Empty DataFrame
Columns: [sepal_length, sepal_width, petal_length, petal_width]
Index: []
