In [17]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer

In [12]:
df = pd.read_csv('covid_toy.csv')

In [13]:
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


# Using Column Transformer

In [18]:
preprocessor = ColumnTransformer(
transformers=[
    ('imputer',SimpleImputer(strategy='mean'),['fever']),
    ("scalling",StandardScaler(),['age','fever']),
    ('OheEncodinng',OneHotEncoder(drop='first',sparse=False),['gender','city']),
    ('ordinalEncoding',OrdinalEncoder(categories=[['Mild','Strong']]),['cough']),
],
remainder='passthrough'
)

In [19]:
preprocessor

In [20]:
df.head(1)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No


In [21]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,:-1],df.iloc[:,-1],test_size=0.2, random_state=42)

In [22]:
new_X_train=preprocessor.fit_transform(X_train)
new_X_test=preprocessor.fit_transform(X_test)



In [23]:
new_X_train

array([[ 1.01000000e+02,  1.56614097e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         1.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+02, -1.55894504e+00, -4.91423034e-01,
         0.00000000e+00,  0.00000000e+00,  1.00000000e+00,
         0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+02, -9.83271305e-01, -4.91423034e-01,
         0.00000000e+00,  0.00000000e+00,  1.00000000e+00,
         0.00000000e+00,  0.00000000e+00],
       [ 1.00000000e+02, -6.54314883e-01, -4.91423034e-01,
         1.00000000e+00,  1.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00],
       [ 1.03000000e+02,  1.23718454e+00,  9.82846069e-01,
         0.00000000e+00,  1.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00],
       [ 1.03000000e+02,  1.11382589e+00,  9.82846069e-01,
         1.00000000e+00,  0.00000000e+00,  1.00000000e+00,
         0.00000000e+00,  1.00000000e+00],
       [ 1.02000000e+02,  2.503152