In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder

In [3]:
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

In [5]:
df = pd.read_csv('covid_toy.csv')

In [7]:
df

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No
...,...,...,...,...,...,...
95,12,Female,104.0,Mild,Bangalore,No
96,51,Female,101.0,Strong,Kolkata,Yes
97,20,Female,101.0,Mild,Bangalore,No
98,5,Female,98.0,Strong,Mumbai,No


In [19]:
transformer = ColumnTransformer(transformers = [
    ('tnf1',SimpleImputer(),['fever']),
    ('tnf2', OrdinalEncoder(categories = [['Mild','Strong']]),['cough']),
    ('tnf3',OneHotEncoder(drop = 'first', sparse_output = False),['gender','city']),
], remainder = 'passthrough')

In [21]:
X = df.iloc[:,0:5].values

In [23]:
Y = df.iloc[:,-1].values

In [25]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2)

In [45]:
X_train = pd.DataFrame(X_train, columns = df.columns[0:5])
X_test = pd.DataFrame(X_test, columns = df.columns[0:5])

In [47]:
X_train = transformer.fit_transform(X_train)
X_test = transformer.transform(X_test)

In [49]:
X_train

array([[102.0, 1.0, 0.0, 0.0, 0.0, 0.0, 82],
       [98.0, 0.0, 0.0, 0.0, 0.0, 1.0, 65],
       [98.0, 1.0, 0.0, 0.0, 0.0, 1.0, 69],
       [102.0, 1.0, 1.0, 1.0, 0.0, 0.0, 20],
       [98.0, 1.0, 1.0, 0.0, 1.0, 0.0, 34],
       [100.79166666666667, 0.0, 0.0, 0.0, 0.0, 0.0, 84],
       [100.79166666666667, 0.0, 1.0, 0.0, 0.0, 1.0, 23],
       [101.0, 0.0, 0.0, 0.0, 1.0, 0.0, 8],
       [104.0, 1.0, 0.0, 1.0, 0.0, 0.0, 34],
       [104.0, 1.0, 0.0, 0.0, 0.0, 0.0, 56],
       [102.0, 0.0, 0.0, 0.0, 0.0, 0.0, 69],
       [100.0, 0.0, 1.0, 1.0, 0.0, 0.0, 27],
       [98.0, 0.0, 0.0, 0.0, 0.0, 0.0, 64],
       [99.0, 1.0, 1.0, 0.0, 0.0, 0.0, 66],
       [101.0, 0.0, 0.0, 0.0, 0.0, 0.0, 38],
       [100.0, 0.0, 1.0, 0.0, 0.0, 0.0, 11],
       [101.0, 1.0, 1.0, 0.0, 0.0, 0.0, 14],
       [103.0, 1.0, 1.0, 0.0, 0.0, 0.0, 46],
       [100.79166666666667, 0.0, 1.0, 1.0, 0.0, 0.0, 38],
       [100.79166666666667, 0.0, 0.0, 1.0, 0.0, 0.0, 75],
       [99.0, 1.0, 0.0, 0.0, 1.0, 0.0, 25],
       [98

In [51]:
X_test

array([[104.0, 0.0, 0.0, 0.0, 1.0, 0.0, 17],
       [101.0, 0.0, 1.0, 1.0, 0.0, 0.0, 15],
       [100.79166666666667, 1.0, 1.0, 0.0, 1.0, 0.0, 71],
       [100.0, 1.0, 0.0, 0.0, 1.0, 0.0, 13],
       [103.0, 0.0, 0.0, 0.0, 1.0, 0.0, 69],
       [98.0, 0.0, 0.0, 1.0, 0.0, 0.0, 80],
       [101.0, 0.0, 1.0, 1.0, 0.0, 0.0, 42],
       [102.0, 1.0, 0.0, 1.0, 0.0, 0.0, 33],
       [104.0, 0.0, 1.0, 0.0, 1.0, 0.0, 16],
       [102.0, 0.0, 1.0, 0.0, 0.0, 0.0, 64],
       [98.0, 1.0, 0.0, 0.0, 1.0, 0.0, 10],
       [102.0, 1.0, 0.0, 0.0, 1.0, 0.0, 82],
       [100.0, 0.0, 1.0, 0.0, 0.0, 0.0, 80],
       [104.0, 0.0, 1.0, 0.0, 0.0, 0.0, 51],
       [103.0, 0.0, 1.0, 0.0, 1.0, 0.0, 60],
       [98.0, 0.0, 1.0, 0.0, 0.0, 0.0, 73],
       [98.0, 0.0, 0.0, 0.0, 1.0, 0.0, 31],
       [100.79166666666667, 1.0, 0.0, 0.0, 0.0, 1.0, 34],
       [98.0, 1.0, 0.0, 0.0, 0.0, 1.0, 81],
       [103.0, 0.0, 0.0, 0.0, 1.0, 0.0, 48]], dtype=object)