#### Importing the Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import warnings
warnings.filterwarnings(action = 'ignore')

#### Importing the Dataset

In [2]:
covid = pd.read_csv('Covid_Toy.csv')
covid.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


#### Splitting the data into train and test data

In [3]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(covid.drop(columns = ['has_covid']), covid['has_covid'], test_size=0.2, random_state=2)

#### WITHOUT USING COLUMN TRANSFORMER

#### 1. Simple Imputer

In [4]:
covid.isnull().sum()

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

In [5]:
from sklearn.impute import SimpleImputer
Imputer = SimpleImputer()

x_train_fever = Imputer.fit_transform(x_train[['fever']])
x_test_fever = Imputer.fit_transform(x_test[['fever']])

x_train_fever.shape

(80, 1)

#### 2. Ordinal Encoding 

In [6]:
from sklearn.preprocessing import OrdinalEncoder
encoder = OrdinalEncoder(categories=[['Mild','Strong']])

x_train_cough = encoder.fit_transform(x_train[['cough']])
x_test_cough = encoder.fit_transform(x_test[['cough']])

x_train_cough.shape

(80, 1)

#### 3. OneHot Encoding

In [7]:
from sklearn.preprocessing import OneHotEncoder
encoder2 = OneHotEncoder(drop='first',sparse=False)

x_train_gender_city = encoder2.fit_transform(x_train[['gender','city']])
x_test_gender_city = encoder2.fit_transform(x_test[['gender','city']])

x_train_gender_city.shape

(80, 4)

In [8]:
# Extracting Age Column
x_train_age = x_train.drop(columns = ['gender', 'fever', 'cough', 'city']).values
x_test_age = x_test.drop(columns = ['gender', 'fever', 'cough', 'city']).values

x_train_age.shape

(80, 1)

In [9]:
# Combining all the obtained columns together
x_train_transformed = np.concatenate((x_train_age,x_train_fever,x_train_gender_city,x_train_cough),axis=1)
x_test_transformed = np.concatenate((x_test_age,x_test_fever,x_test_gender_city,x_test_cough),axis=1)

x_train_transformed.shape

(80, 7)

#### WITH USING COLUMN TRANSFORMER

In [10]:
from sklearn.compose import ColumnTransformer

In [11]:
transformer = ColumnTransformer(transformers = [
    ('transformer1', SimpleImputer(), ['fever']),
    ('transformer2', OrdinalEncoder(categories = [['Mild', 'Strong']]), ['cough']),
    ('transformer3', OneHotEncoder(sparse = False, drop = 'first'), ['gender', 'city'])
], remainder = 'passthrough')

In [12]:
transformer.fit_transform(x_train).shape

(80, 7)