In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("covid_toy.csv")

In [3]:
df.sample(10)

Unnamed: 0,age,gender,fever,cough,city,has_covid
38,49,Female,101.0,Mild,Delhi,Yes
6,14,Male,101.0,Strong,Bangalore,No
92,82,Female,102.0,Strong,Kolkata,No
66,51,Male,104.0,Mild,Kolkata,No
23,80,Female,98.0,Mild,Delhi,Yes
17,40,Female,98.0,Strong,Delhi,No
64,42,Male,104.0,Mild,Mumbai,No
54,60,Female,99.0,Mild,Mumbai,Yes
40,49,Female,102.0,Mild,Delhi,No
46,19,Female,101.0,Mild,Mumbai,No


In [4]:
df.shape

(100, 6)

In [5]:
df.isnull().sum()

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

In [6]:
from sklearn.model_selection import train_test_split 
X_train , X_test , y_train, y_train = train_test_split(df.drop(columns=["has_covid"]),df['has_covid'],test_size=.2)

In [10]:
X_train 

Unnamed: 0,age,gender,fever,cough,city
48,66,Male,99.0,Strong,Bangalore
50,19,Male,101.0,Mild,Delhi
89,46,Male,103.0,Strong,Bangalore
3,31,Female,98.0,Mild,Kolkata
90,59,Female,99.0,Strong,Delhi
...,...,...,...,...,...
84,69,Female,98.0,Strong,Mumbai
78,11,Male,100.0,Mild,Bangalore
64,42,Male,104.0,Mild,Mumbai
59,6,Female,104.0,Mild,Kolkata


In [11]:
y_train

47     No
91    Yes
13    Yes
25     No
20     No
5     Yes
15    Yes
2      No
45     No
38    Yes
36     No
24     No
52    Yes
99    Yes
65     No
4      No
77     No
34    Yes
7     Yes
49     No
Name: has_covid, dtype: object

# Aam Zindagi      (Without column transformation)

In [15]:
from sklearn.impute import SimpleImputer # this is use for the missing value in the data

si = SimpleImputer()
X_train_fever = si.fit_transform(X_train[["fever"]])   #missing value is replaced by the mean of the column 



X_test_fever = si.fit_transform(X_test[["fever"]])    #missing value is replaced by the mean of the column 
                                                     

X_train_fever.shape

(80, 1)

In [19]:

from sklearn.preprocessing import OrdinalEncoder
oe = OrdinalEncoder(categories=[['Mild','Strong']])


X_train_cough = oe.fit_transform(X_train[["cough"]])


X_test_cough = oe.fit_transform(X_test[["cough"]])


X_train_cough.shape

(80, 1)

In [23]:
from sklearn.preprocessing import OneHotEncoder

ohe = OneHotEncoder(drop='first',sparse_output=False)
X_train_gender = ohe.fit_transform(X_train[["gender","city"]])


X_test_gender = ohe.fit_transform(X_test[["gender","city"]])

X_train_gender.shape

(80, 4)

In [26]:
# Extracting age

X_train_age = X_train.drop(columns=["gender","cough","fever","city"]).values


X_test_age = X_test.drop(columns=["gender","cough","fever","city"]).values

X_train_age.shape

(80, 1)

In [35]:
X_train_transform = np.concatenate((X_train_age,X_train_fever,X_train_gender,X_train_cough),axis=1)


X_test_transform = np.concatenate((X_test_age,X_test_fever,X_test_gender,X_test_cough),axis=1)


X_train_transform.shape
X_test_transform.shape

(20, 7)

# Mentos Zindagi   (With Column transformation)

In [29]:
from sklearn.compose import ColumnTransformer


In [32]:
transformer = ColumnTransformer(transformers=[
    ('tnf1',SimpleImputer(),['fever']),
    ('tnf2',OrdinalEncoder(categories=[['Mild','Strong']]),['cough']),
    ('tnf3',OneHotEncoder(sparse_output=False,drop='first'),['gender','city'])
],remainder='passthrough')          # in remender there is two option 1st. drop all the column where you not use the column transformation. 2nd passthrough means the column remain same 

In [36]:
transformer.fit_transform(X_train).shape

array([[ 99.       ,   1.       ,   1.       ,   0.       ,   0.       ,
          0.       ,  66.       ],
       [101.       ,   0.       ,   1.       ,   1.       ,   0.       ,
          0.       ,  19.       ],
       [103.       ,   1.       ,   1.       ,   0.       ,   0.       ,
          0.       ,  46.       ],
       [ 98.       ,   0.       ,   0.       ,   0.       ,   1.       ,
          0.       ,  31.       ],
       [ 99.       ,   1.       ,   0.       ,   1.       ,   0.       ,
          0.       ,  59.       ],
       [ 98.       ,   0.       ,   0.       ,   1.       ,   0.       ,
          0.       ,  80.       ],
       [101.       ,   1.       ,   1.       ,   0.       ,   0.       ,
          0.       ,  14.       ],
       [100.       ,   0.       ,   1.       ,   0.       ,   0.       ,
          0.       ,  80.       ],
       [ 99.       ,   0.       ,   1.       ,   1.       ,   0.       ,
          0.       ,  65.       ],
       [101.       ,   0.   

In [34]:
transformer.fit_transform(X_test).shape

(20, 7)