In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('dataset//covid_toy.csv')

In [2]:
df.head(3)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No


In [3]:
from sklearn.impute import SimpleImputer
si = SimpleImputer()

In [4]:
df_fever = si.fit_transform(df[['fever']])

In [5]:
df.drop('fever', axis=1, inplace=True)

In [6]:
df['fever'] = df_fever

In [7]:
df.head(3)

Unnamed: 0,age,gender,cough,city,has_covid,fever
0,60,Male,Mild,Kolkata,No,103.0
1,27,Male,Mild,Delhi,Yes,100.0
2,42,Male,Mild,Delhi,No,101.0


In [8]:
from sklearn.model_selection import train_test_split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('has_covid', axis=1), df['has_covid'], test_size=0.2, random_state=0)

In [10]:
X_train.head(3)

Unnamed: 0,age,gender,cough,city,fever
43,22,Female,Mild,Bangalore,99.0
62,56,Female,Strong,Bangalore,104.0
3,31,Female,Mild,Kolkata,98.0


In [11]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

In [12]:
X_train.isnull().sum()

age       0
gender    0
cough     0
city      0
fever     0
dtype: int64

In [13]:
from sklearn.compose import ColumnTransformer

In [14]:
ct = ColumnTransformer(transformers=[
    ('tf2', OrdinalEncoder(categories=[['Mild', 'Strong']]), ['cough']),
    ('tf3', OneHotEncoder(sparse_output=False, drop='first', dtype=np.int32), ['gender', 'city']),
    ('tf4', StandardScaler(), ['age', 'fever'])], remainder='passthrough')

In [15]:
X_train_transformed = ct.fit_transform(X_train)

In [16]:
X_test_transformed = ct.transform(X_test)

In [17]:
pd.DataFrame(X_train_transformed).isnull().sum()

0    0
1    0
2    0
3    0
4    0
5    0
6    0
dtype: int64

In [18]:
pd.DataFrame(X_test_transformed).isnull().sum()

0    0
1    0
2    0
3    0
4    0
5    0
6    0
dtype: int64

In [19]:
from sklearn.preprocessing import LabelEncoder

In [20]:
lr = LabelEncoder()

In [21]:
y_train = pd.Series(lr.fit_transform(y_train))

In [22]:
y_test = pd.Series(lr.transform(y_test))

In [23]:
from sklearn.tree import DecisionTreeClassifier

In [24]:
model = DecisionTreeClassifier()

In [25]:
model.fit(X_train_transformed, y_train)

In [26]:
y_pred = model.predict(X_test_transformed)

In [27]:
from sklearn.metrics import accuracy_score

In [28]:
score = accuracy_score(y_test, y_pred)

In [29]:
score

0.4

In [30]:
from sklearn.linear_model import LogisticRegression

In [31]:
model_LR = LogisticRegression()

In [32]:
model_LR.fit(X_train_transformed, y_train)

In [36]:
y_pred_LR = model_LR.predict(X_test_transformed)

In [37]:
score_LR = accuracy_score(y_test, y_pred_LR)

In [39]:
score_LR

0.35

In [42]:
from sklearn.linear_model import LinearRegression
model_L = LinearRegression()

In [43]:
model_L.fit(X_train_transformed, y_train)

In [44]:
y_pred_L = model_L.predict(X_test_transformed)

In [48]:
np.int32(y_pred_L)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])