In [1]:
import pandas as pd
import numpy as np


In [3]:
data = pd.read_csv('covid_toy.csv')

In [5]:
data.sample(5)

Unnamed: 0,age,gender,fever,cough,city,has_covid
84,69,Female,98.0,Strong,Mumbai,No
0,60,Male,103.0,Mild,Kolkata,No
11,65,Female,98.0,Mild,Mumbai,Yes
63,10,Male,100.0,Mild,Bangalore,No
14,51,Male,104.0,Mild,Bangalore,No


In [7]:
data.isnull().sum()

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

In [29]:
data.shape

(100, 6)

In [9]:
# Age Column    = Numerical Column (No Need for preprocessing)
# Gender Column = Nominal Categorical Data (One Hot Encoding)
# Fever Column  = Numerical Column but it is missing some values (Simple Imputer)
# Cough Column  = Ordinal Data (Ordinal Encoding)
# City Column   = Nominal Categorical Data (One Hot Encoding)

In [15]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer

In [37]:
ct = ColumnTransformer(transformers=[
    ('ct1 ', SimpleImputer(),['fever']),
    ('ct2', OrdinalEncoder(categories = [['Mild','Strong']]), ['cough']),
    ('ct3', OneHotEncoder(drop='first', sparse_output=False), ['gender','city'])
] , remainder = 'passthrough')

In [19]:
from sklearn.model_selection import train_test_split

In [21]:
X_train, X_test, y_train, y_test = train_test_split(data.drop(['has_covid'], axis=1), data['has_covid'] , test_size = 0.3)

In [23]:
X_train.head()

Unnamed: 0,age,gender,fever,cough,city
20,12,Male,98.0,Strong,Bangalore
8,19,Female,100.0,Strong,Bangalore
12,25,Female,99.0,Strong,Kolkata
83,17,Female,104.0,Mild,Kolkata
15,70,Male,103.0,Strong,Kolkata


In [33]:
y_train.shape

(70,)

In [31]:
X_train.shape

(70, 5)

In [39]:
X_train_transformed = ct.fit_transform(X_train)
X_test_transformed = ct.transform(X_test)

In [45]:
X_train_transformed

array([[ 98.        ,   1.        ,   1.        ,   0.        ,
          0.        ,   0.        ,  12.        ],
       [100.        ,   1.        ,   0.        ,   0.        ,
          0.        ,   0.        ,  19.        ],
       [ 99.        ,   1.        ,   0.        ,   0.        ,
          1.        ,   0.        ,  25.        ],
       [104.        ,   0.        ,   0.        ,   0.        ,
          1.        ,   0.        ,  17.        ],
       [103.        ,   1.        ,   1.        ,   0.        ,
          1.        ,   0.        ,  70.        ],
       [101.06557377,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,  84.        ],
       [101.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   1.        ,  81.        ],
       [101.06557377,   0.        ,   1.        ,   0.        ,
          1.        ,   0.        ,  82.        ],
       [101.        ,   1.        ,   0.        ,   1.        ,
          0.    

In [47]:
X_train_transformed.shape

(70, 7)