
This notebook demonstrates how to handle missing values in a dataset using **pandas** in Google Colab.
We'll:
1. Upload a CSV file
2. Inspect missing values
3. Apply three methods: Dropna, Mean Fill, and Forward Fill
4. Compare results.

In [None]:
import pandas as pd
import numpy as np
from google.colab import files

In [None]:
print(" Please upload your dataset file (e.g., ideal_weight_dataset_with_missing.csv):")
uploaded = files.upload()

file_name = list(uploaded.keys())[0]
print(f"\n File uploaded successfully: {file_name}")

In [None]:
df = pd.read_csv(file_name)

print("\n First 5 rows of the dataset:")
print(df.head())

print("\n Number of missing values before handling:")
print(df.isnull().sum())

In [None]:
df_drop = df.dropna()
print("\n After using dropna():")
print(df_drop.isnull().sum())

In [None]:
df_mean = df.fillna(df.mean(numeric_only=True))
print("\n After using mean fill:")
print(df_mean.isnull().sum())

In [None]:
df_ffill = df.ffill()
print("\n After using forward fill:")
print(df_ffill.isnull().sum())

In [None]:
comparison = pd.DataFrame({
    'Original Missing': df.isnull().sum(),
    'After Dropna': df_drop.isnull().sum(),
    'After Mean Fill': df_mean.isnull().sum(),
    'After Forward Fill': df_ffill.isnull().sum()
})

print("\n Comparison of All Three Methods:")
print(comparison)