In [1]:
import pandas as pd
import numpy as np

# Create a sample dataset with missing values
data = {
    'Age': [25, np.nan, 30, 35, np.nan, 40],
    'Gender': ['M', 'F', 'M', np.nan, 'F', 'M'],
    'Salary': [50000, 60000, np.nan, 70000, 80000, np.nan],
}

df = pd.DataFrame(data)
print("Original Dataset:")
print(df)


Original Dataset:
    Age Gender   Salary
0  25.0      M  50000.0
1   NaN      F  60000.0
2  30.0      M      NaN
3  35.0    NaN  70000.0
4   NaN      F  80000.0
5  40.0      M      NaN


In [3]:
# Drop rows with missing values
df_drop_rows = df.dropna()
print("\nAfter Dropping Rows with Missing Values:")
print(df_drop_rows)

# Drop columns with missing values
df_drop_columns = df.dropna(axis=1)
print("\nAfter Dropping Columns with Missing Values:")
print(df_drop_columns)



After Dropping Rows with Missing Values:
    Age Gender   Salary
0  25.0      M  50000.0

After Dropping Columns with Missing Values:
Empty DataFrame
Columns: []
Index: [0, 1, 2, 3, 4, 5]


In [5]:
# Impute missing values in 'Age' column with the mean
df['Age'] = df['Age'].fillna(df['Age'].mean())
print("\nAfter Imputing Missing Values in 'Age' with Mean:")
print(df)

# Impute missing values in 'Gender' column with the mode (most frequent value)
df['Gender'] = df['Gender'].fillna(df['Gender'].mode()[0])
print("\nAfter Imputing Missing Values in 'Gender' with Mode:")
print(df)

# Impute missing values in 'Salary' column with the median
df['Salary'] = df['Salary'].fillna(df['Salary'].median())
print("\nAfter Imputing Missing Values in 'Salary' with Median:")
print(df)



After Imputing Missing Values in 'Age' with Mean:
    Age Gender   Salary
0  25.0      M  50000.0
1  32.5      F  60000.0
2  30.0      M      NaN
3  35.0    NaN  70000.0
4  32.5      F  80000.0
5  40.0      M      NaN

After Imputing Missing Values in 'Gender' with Mode:
    Age Gender   Salary
0  25.0      M  50000.0
1  32.5      F  60000.0
2  30.0      M      NaN
3  35.0      M  70000.0
4  32.5      F  80000.0
5  40.0      M      NaN

After Imputing Missing Values in 'Salary' with Median:
    Age Gender   Salary
0  25.0      M  50000.0
1  32.5      F  60000.0
2  30.0      M  65000.0
3  35.0      M  70000.0
4  32.5      F  80000.0
5  40.0      M  65000.0


In [7]:
# Forward fill the missing values (use the previous value to fill)
df_ffill = df.fillna(method='ffill')
print("\nAfter Forward Filling Missing Values:")
print(df_ffill)

# Backward fill the missing values (use the next value to fill)
df_bfill = df.fillna(method='bfill')
print("\nAfter Backward Filling Missing Values:")
print(df_bfill)



After Forward Filling Missing Values:
    Age Gender   Salary
0  25.0      M  50000.0
1  32.5      F  60000.0
2  30.0      M  65000.0
3  35.0      M  70000.0
4  32.5      F  80000.0
5  40.0      M  65000.0

After Backward Filling Missing Values:
    Age Gender   Salary
0  25.0      M  50000.0
1  32.5      F  60000.0
2  30.0      M  65000.0
3  35.0      M  70000.0
4  32.5      F  80000.0
5  40.0      M  65000.0


  df_ffill = df.fillna(method='ffill')
  df_bfill = df.fillna(method='bfill')


In [9]:
# Interpolate missing values (linear interpolation by default)
df_interpolated = df.interpolate()
print("\nAfter Interpolating Missing Values:")
print(df_interpolated)



After Interpolating Missing Values:
    Age Gender   Salary
0  25.0      M  50000.0
1  32.5      F  60000.0
2  30.0      M  65000.0
3  35.0      M  70000.0
4  32.5      F  80000.0
5  40.0      M  65000.0


  df_interpolated = df.interpolate()
