In [6]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
data = {
 'A': [1, 2, None, 4, 5],
 'B': ['X', None, 'Y', 'Z', 'X'],
 'C': [7, 8, 9, None, 11]
}
df = pd.DataFrame(data)
print("DataSet:\n",df)
print("\nHandling missing Values\n")
print("....................................................................\n")
imputer = SimpleImputer(strategy='mean')
df[['A', 'C']] = imputer.fit_transform(df[['A', 'C']])
print("DataSet after handling Missing Values of A and C Columns:\n",df[['A', 'C']])
print("\nEncoding\n")
print("....................................................................\n")
label_encoder = LabelEncoder()
df['B'] = df['B'].fillna('Unknown')
print("\nDataSet after handling Missing Values of B Before Label encoding:\n", df['B'])
df['B_encoded'] = label_encoder.fit_transform(df['B'])
print("\nDataSet after handling Missing Values of B After Label encoding:\n", df['B_encoded'])
one_hot_encoder = OneHotEncoder()
encoded_data = one_hot_encoder.fit_transform(df[['B_encoded']]).toarray()
encoded_df = pd.DataFrame(encoded_data, columns=[f'B_{i}' for i in
range(encoded_data.shape[1])])
df1 = pd.concat([df, encoded_df], axis=1)
print("DataSet after handling Missing Values of B After one_hot_encoder:\n",df1)
print("\nFeature scaling\n")
print("....................................................................\n")
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[['A', 'C']])
scaled_df = pd.DataFrame(scaled_data, columns=['A_scaled', 'C_scaled'])
df2 = pd.concat([df, scaled_df], axis=1)
print("Feature Scaling using Standard scaler\n", df2)

DataSet:
      A     B     C
0  1.0     X   7.0
1  2.0  None   8.0
2  NaN     Y   9.0
3  4.0     Z   NaN
4  5.0     X  11.0

Handling missing Values

....................................................................

DataSet after handling Missing Values of A and C Columns:
      A      C
0  1.0   7.00
1  2.0   8.00
2  3.0   9.00
3  4.0   8.75
4  5.0  11.00

Encoding

....................................................................


DataSet after handling Missing Values of B Before Label encoding:
 0          X
1    Unknown
2          Y
3          Z
4          X
Name: B, dtype: object

DataSet after handling Missing Values of B After Label encoding:
 0    1
1    0
2    2
3    3
4    1
Name: B_encoded, dtype: int64
DataSet after handling Missing Values of B After one_hot_encoder:
      A        B      C  B_encoded  B_0  B_1  B_2  B_3
0  1.0        X   7.00          1  0.0  1.0  0.0  0.0
1  2.0  Unknown   8.00          0  1.0  0.0  0.0  0.0
2  3.0        Y   9.00          2  0.0 