In [2]:
# prompt: handling missing data

import pandas as pd

# Sample DataFrame with missing values
data = {'A': [1, 2, None, 4, 5],
        'B': [6, None, 8, 9, 10],
        'C': [11, 12, 13, None, 15]}
df = pd.DataFrame(data)

# 1. Check for missing values
print(df.isnull().sum())

# 2. Drop rows with any missing values
df_dropped = df.dropna()
print("\nDataFrame after dropping rows with any missing values:\n", df_dropped)

# 3. Drop columns with any missing values
df_dropped_cols = df.dropna(axis=1)
print("\nDataFrame after dropping columns with any missing values:\n", df_dropped_cols)

# 4. Fill missing values with a specific value (e.g., 0)
df_filled_zero = df.fillna(0)
print("\nDataFrame after filling missing values with 0:\n", df_filled_zero)

# 5. Fill missing values with the mean of the column
df_filled_mean = df.fillna(df.mean())
print("\nDataFrame after filling missing values with column means:\n", df_filled_mean)


# 6. Fill missing values with the median of the column
df_filled_median = df.fillna(df.median())
print("\nDataFrame after filling missing values with column medians:\n", df_filled_median)

# 7. Forward fill
df_ffill = df.fillna(method='ffill')
print("\nDataFrame after forward fill:\n", df_ffill)

# 8. Backward fill
df_bfill = df.fillna(method='bfill')
print("\nDataFrame after backward fill:\n", df_bfill)


A    1
B    1
C    1
dtype: int64

DataFrame after dropping rows with any missing values:
      A     B     C
0  1.0   6.0  11.0
4  5.0  10.0  15.0

DataFrame after dropping columns with any missing values:
 Empty DataFrame
Columns: []
Index: [0, 1, 2, 3, 4]

DataFrame after filling missing values with 0:
      A     B     C
0  1.0   6.0  11.0
1  2.0   0.0  12.0
2  0.0   8.0  13.0
3  4.0   9.0   0.0
4  5.0  10.0  15.0

DataFrame after filling missing values with column means:
      A      B      C
0  1.0   6.00  11.00
1  2.0   8.25  12.00
2  3.0   8.00  13.00
3  4.0   9.00  12.75
4  5.0  10.00  15.00

DataFrame after filling missing values with column medians:
      A     B     C
0  1.0   6.0  11.0
1  2.0   8.5  12.0
2  3.0   8.0  13.0
3  4.0   9.0  12.5
4  5.0  10.0  15.0

DataFrame after forward fill:
      A     B     C
0  1.0   6.0  11.0
1  2.0   6.0  12.0
2  2.0   8.0  13.0
3  4.0   9.0  13.0
4  5.0  10.0  15.0

DataFrame after backward fill:
      A     B     C
0  1.0   6.0  11.0

  df_ffill = df.fillna(method='ffill')
  df_bfill = df.fillna(method='bfill')
