# Drop missing rows

In [5]:
import pandas as pd
# Sample dataset with missing values
data = {
    'Feature1': [1, 2, 3, None, 5, 6],
    'Feature2': [7, None, 9, 10, 11, 12],
    'Feature3': ['A', 'B', None, 'C', 'D', 'E'],
}
df = pd.DataFrame(data)
print("Initial dataset:")
print(df)
# Method 1: Removal of Rows with Missing Values
# This method removes rows containing any missing values from the dataset.
df_removed = df.dropna()
print("Dataset after removing rows with missing values:")
print(df_removed)
# Method 2: Listwise Deletion (Removal of Rows with Any Missing Values in Specific Columns)
# In this method, we remove rows with missing values in specific columns.
# Specify the columns where missing values should be considered for removal
columns_to_check = ['Feature1', 'Feature2']
df_listwise_deleted = df.dropna(subset=columns_to_check)
print("\nDataset after listwise deletion based on specific columns:")
print(df_listwise_deleted)


Initial dataset:
   Feature1  Feature2 Feature3
0       1.0       7.0        A
1       2.0       NaN        B
2       3.0       9.0     None
3       NaN      10.0        C
4       5.0      11.0        D
5       6.0      12.0        E
Dataset after removing rows with missing values:
   Feature1  Feature2 Feature3
0       1.0       7.0        A
4       5.0      11.0        D
5       6.0      12.0        E

Dataset after listwise deletion based on specific columns:
   Feature1  Feature2 Feature3
0       1.0       7.0        A
2       3.0       9.0     None
4       5.0      11.0        D
5       6.0      12.0        E


# Fill with mean

In [8]:
import pandas as pd
import numpy as np

# Create a sample dataset with missing values
data = {
    'Feature1': [1, 2, None, 4, 5, 6],
    'Feature2': [7, None, 9, 10, None, 12],
    'Feature3': ['A', 'B', 'C', None, 'D', 'E'],
}

df = pd.DataFrame(data)
print(df)
# Method 1: Mean Imputation
mean_imputed_df = df.copy()
mean_imputed_df['Feature1'].fillna(df['Feature1'].mean(), inplace=True)
mean_imputed_df['Feature2'].fillna(df['Feature2'].mean(), inplace=True)

print("Dataset after mean imputation:")
print(mean_imputed_df)

# Method 2: Median Imputation
median_imputed_df = df.copy()
median_imputed_df['Feature1'].fillna(df['Feature1'].median(), inplace=True)
median_imputed_df['Feature2'].fillna(df['Feature2'].median(), inplace=True)

print("\nDataset after median imputation:")
print(median_imputed_df)

# Method 3: Mode Imputation (for categorical data)
mode_imputed_df = df.copy()
mode_imputed_df['Feature3'].fillna(df['Feature3'].mode(), inplace=True)

print("\nDataset after mode imputation:")
print(mode_imputed_df)


   Feature1  Feature2 Feature3
0       1.0       7.0        A
1       2.0       NaN        B
2       NaN       9.0        C
3       4.0      10.0     None
4       5.0       NaN        D
5       6.0      12.0        E
Dataset after mean imputation:
   Feature1  Feature2 Feature3
0       1.0       7.0        A
1       2.0       9.5        B
2       3.6       9.0        C
3       4.0      10.0     None
4       5.0       9.5        D
5       6.0      12.0        E

Dataset after median imputation:
   Feature1  Feature2 Feature3
0       1.0       7.0        A
1       2.0       9.5        B
2       4.0       9.0        C
3       4.0      10.0     None
4       5.0       9.5        D
5       6.0      12.0        E

Dataset after mode imputation:
   Feature1  Feature2 Feature3
0       1.0       7.0        A
1       2.0       NaN        B
2       NaN       9.0        C
3       4.0      10.0        D
4       5.0       NaN        D
5       6.0      12.0        E
