# Covid Prediction Model

### Importing Packages

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### Data Visualization

In [2]:
complete_data = pd.read_csv("new_dataset.csv")
data = complete_data[[ 'Age (Yrs)', 'Heart Rate (bpm)', 'Body Temperature (F)', 'Systolic Blood Pressure (mm Hg)', ' Diastolic Blood Pressure (mm Hg)',
           'SpO2 (%)', 'Respiration Rate (breaths/min)', 'Sugar (mg/dL)','Covid']]
data

Unnamed: 0,Age (Yrs),Heart Rate (bpm),Body Temperature (F),Systolic Blood Pressure (mm Hg),Diastolic Blood Pressure (mm Hg),SpO2 (%),Respiration Rate (breaths/min),Sugar (mg/dL),Covid
0,15,170,98.2,177,107,65,33,251,Yes
1,10,158,96.2,164,111,90,32,259,No
2,30,145,102.7,179,104,81,28,236,Yes
3,46,78,98.5,138,83,97,13,108,No
4,16,120,98.5,186,117,78,30,279,Yes
...,...,...,...,...,...,...,...,...,...
9995,27,78,99.3,108,66,95,17,102,No
9996,20,65,100.3,131,64,100,15,94,No
9997,30,90,100.5,90,64,95,16,120,No
9998,23,120,102.1,172,118,75,32,187,Yes


In [3]:
data.to_csv('dataset.csv',index= False)

In [4]:
X = data.drop('Covid', axis=1)
Y = data['Covid']
Y.value_counts()

No     6439
Yes    3561
Name: Covid, dtype: int64

In [5]:
Y = Y.to_frame()

In [1]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2)

NameError: name 'X' is not defined

# Machine Learning Algorithms

### Decision Tree 

In [7]:
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree

dt_clf = DecisionTreeClassifier()
dt_clf.fit(X_train,Y_train)
Y_pred_dt_1 = dt_clf.predict(X_test)

In [8]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(Y_test, Y_pred_dt_1))
print(classification_report(Y_test, Y_pred_dt_1))

[[1269   20]
 [  25  686]]
              precision    recall  f1-score   support

          No       0.98      0.98      0.98      1289
         Yes       0.97      0.96      0.97       711

    accuracy                           0.98      2000
   macro avg       0.98      0.97      0.98      2000
weighted avg       0.98      0.98      0.98      2000



In [9]:
from sklearn.metrics import accuracy_score
print('Decision Tree Accuracy: ', accuracy_score(Y_test,Y_pred_dt_1)*100)

Decision Tree Accuracy:  97.75


### Random Forest Classifier

In [10]:
from sklearn.ensemble import RandomForestClassifier

In [11]:
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier()#random_state=max_i
rf_clf = rf_clf.fit(X_train,Y_train)
estimator_limited = rf_clf.estimators_[5]
y_pred_rf_1 = rf_clf.predict(X_test)
print('Random Forest Accuracy: ', accuracy_score(Y_test,y_pred_rf_1)*100)

Random Forest Accuracy:  98.95


In [12]:
print(confusion_matrix(Y_test, y_pred_rf_1))
print(classification_report(Y_test, y_pred_rf_1))

[[1268   21]
 [   0  711]]
              precision    recall  f1-score   support

          No       1.00      0.98      0.99      1289
         Yes       0.97      1.00      0.99       711

    accuracy                           0.99      2000
   macro avg       0.99      0.99      0.99      2000
weighted avg       0.99      0.99      0.99      2000



### Logistic Regression

In [13]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(random_state=0).fit(X_train, Y_train)
lr_y_pred_1 = lr_model.predict(X_test)
print('Logistic Regression Accuracy: ', accuracy_score(Y_test,lr_y_pred_1)*100)

Logistic Regression Accuracy:  91.8


In [14]:
print(confusion_matrix(Y_test, lr_y_pred_1))
print(classification_report(Y_test, lr_y_pred_1))

[[1190   99]
 [  65  646]]
              precision    recall  f1-score   support

          No       0.95      0.92      0.94      1289
         Yes       0.87      0.91      0.89       711

    accuracy                           0.92      2000
   macro avg       0.91      0.92      0.91      2000
weighted avg       0.92      0.92      0.92      2000



### Naive Bayes

In [15]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train,Y_train)
gnb_y_pred_1 = gnb.predict(X_test)
print('Naive Bayes Accuracy: ', accuracy_score(Y_test,gnb_y_pred_1)*100)

Naive Bayes Accuracy:  84.6


In [16]:
print(confusion_matrix(Y_test, gnb_y_pred_1))
print(classification_report(Y_test, gnb_y_pred_1))

[[982 307]
 [  1 710]]
              precision    recall  f1-score   support

          No       1.00      0.76      0.86      1289
         Yes       0.70      1.00      0.82       711

    accuracy                           0.85      2000
   macro avg       0.85      0.88      0.84      2000
weighted avg       0.89      0.85      0.85      2000



### K-Nearest Neighbors

In [17]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train,Y_train)
knn_y_pred_1 = knn.predict(X_test)
print('K-Nearest Neighbors Accuracy: ', accuracy_score(Y_test,knn_y_pred_1)*100)

K-Nearest Neighbors Accuracy:  90.8


In [18]:
print(confusion_matrix(Y_test, knn_y_pred_1))
print(classification_report(Y_test, knn_y_pred_1))

[[1162  127]
 [  57  654]]
              precision    recall  f1-score   support

          No       0.95      0.90      0.93      1289
         Yes       0.84      0.92      0.88       711

    accuracy                           0.91      2000
   macro avg       0.90      0.91      0.90      2000
weighted avg       0.91      0.91      0.91      2000



### Stochastic Gradient Descent

In [19]:
from sklearn.linear_model import SGDClassifier
sgd = SGDClassifier(loss = 'modified_huber',shuffle = True, random_state = 101)
sgd.fit(X_train,Y_train)
sgd_y_pred_1 = sgd.predict(X_test)
print('Stochastic Gradient Descent Accuracy: ', accuracy_score(Y_test,sgd_y_pred_1)*100)

Stochastic Gradient Descent Accuracy:  89.75


In [20]:
print(confusion_matrix(Y_test, sgd_y_pred_1))
print(classification_report(Y_test, sgd_y_pred_1))

[[1087  202]
 [   3  708]]
              precision    recall  f1-score   support

          No       1.00      0.84      0.91      1289
         Yes       0.78      1.00      0.87       711

    accuracy                           0.90      2000
   macro avg       0.89      0.92      0.89      2000
weighted avg       0.92      0.90      0.90      2000



### Support Vector Machines

In [21]:
from sklearn import svm
svm = svm.SVC()
svm.fit(X_train,Y_train)
svm_y_pred_1 = svm.predict(X_test)
print('Support Vector Machines Accuracy: ', accuracy_score(Y_test,svm_y_pred_1)*100)

Support Vector Machines Accuracy:  93.2


In [22]:
print(confusion_matrix(Y_test, svm_y_pred_1))
print(classification_report(Y_test, svm_y_pred_1))

[[1204   85]
 [  51  660]]
              precision    recall  f1-score   support

          No       0.96      0.93      0.95      1289
         Yes       0.89      0.93      0.91       711

    accuracy                           0.93      2000
   macro avg       0.92      0.93      0.93      2000
weighted avg       0.93      0.93      0.93      2000



## 1 Lakh Dataset

### Data Visualization

In [23]:
complete_data = pd.read_csv("new_dataset_100000.csv")
data = complete_data[[ 'Age (Yrs)', 'Heart Rate (bpm)', 'Body Temperature (F)', 'Systolic Blood Pressure (mm Hg)', ' Diastolic Blood Pressure (mm Hg)',
           'SpO2 (%)', 'Respiration Rate (breaths/min)', 'Sugar (mg/dL)','Covid']]
data

Unnamed: 0,Age (Yrs),Heart Rate (bpm),Body Temperature (F),Systolic Blood Pressure (mm Hg),Diastolic Blood Pressure (mm Hg),SpO2 (%),Respiration Rate (breaths/min),Sugar (mg/dL),Covid
0,40,135,102.1,158,96,69,25,270,Yes
1,52,92,99.6,125,60,92,14,111,No
2,52,71,98.5,121,90,97,16,77,No
3,50,145,100.9,199,105,82,26,258,Yes
4,41,92,98.6,98,90,99,13,105,No
...,...,...,...,...,...,...,...,...,...
99995,22,76,99.9,103,83,97,14,122,No
99996,29,122,103.5,193,125,76,34,216,Yes
99997,46,88,98.2,98,68,97,14,106,No
99998,21,148,101.4,163,126,85,35,201,No


In [24]:
data.to_csv('dataset_100000.csv',index= False)

In [25]:
complete_data[(complete_data['vaccination_status'] == 'Yes' ) & (complete_data['dose'] == 2) & (complete_data['anomaly'] == 1)].count()

vaccination_status                   1420
vaccine                              1420
dose                                 1420
prob                                 1420
anomaly                              1420
Age (Yrs)                            1420
Heart Rate (bpm)                     1420
Body Temperature (F)                 1420
Systolic Blood Pressure (mm Hg)      1420
 Diastolic Blood Pressure (mm Hg)    1420
SpO2 (%)                             1420
Respiration Rate (breaths/min)       1420
Sugar (mg/dL)                        1420
Covid                                1420
dtype: int64

In [26]:
y_no = data[data['Covid'] == 'Yes']
print(y_no.shape)
X = data.drop('Covid', axis=1)
Y = data['Covid']
Y.value_counts()

(35691, 9)


No     64309
Yes    35691
Name: Covid, dtype: int64

In [27]:
Y = Y.to_frame()

In [28]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2)

# Machine Learning Algorithms

### Decision Tree 

In [29]:
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree

dt_clf = DecisionTreeClassifier()
dt_clf.fit(X_train,Y_train)
Y_pred_dt_2 = dt_clf.predict(X_test)

In [30]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(Y_test, Y_pred_dt_2))
print(classification_report(Y_test, Y_pred_dt_2))

[[12658   189]
 [  284  6869]]
              precision    recall  f1-score   support

          No       0.98      0.99      0.98     12847
         Yes       0.97      0.96      0.97      7153

    accuracy                           0.98     20000
   macro avg       0.98      0.97      0.97     20000
weighted avg       0.98      0.98      0.98     20000



In [31]:
from sklearn.metrics import accuracy_score
print('Decision Tree Accuracy: ', accuracy_score(Y_test,Y_pred_dt_2)*100)

Decision Tree Accuracy:  97.635


### Random Forest Classifier

In [32]:
from sklearn.ensemble import RandomForestClassifier

In [33]:
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier()
rf_clf = rf_clf.fit(X_train,Y_train)
estimator_limited = rf_clf.estimators_[5]
y_pred_rf_2 = rf_clf.predict(X_test)
print('Random Forest Accuracy: ', accuracy_score(Y_test,y_pred_rf_2)*100)

Random Forest Accuracy:  99.035


In [34]:
print(confusion_matrix(Y_test, y_pred_rf_2))
print(classification_report(Y_test, y_pred_rf_2))

[[12654   193]
 [    0  7153]]
              precision    recall  f1-score   support

          No       1.00      0.98      0.99     12847
         Yes       0.97      1.00      0.99      7153

    accuracy                           0.99     20000
   macro avg       0.99      0.99      0.99     20000
weighted avg       0.99      0.99      0.99     20000



### Logistic Regression

In [35]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(random_state=0).fit(X_train, Y_train)
lr_y_pred_2 = lr_model.predict(X_test)
print('Logistic Regression Accuracy: ', accuracy_score(Y_test,lr_y_pred_2)*100)

Logistic Regression Accuracy:  90.98


In [36]:
print(confusion_matrix(Y_test, lr_y_pred_2))
print(classification_report(Y_test, lr_y_pred_2))

[[11823  1024]
 [  780  6373]]
              precision    recall  f1-score   support

          No       0.94      0.92      0.93     12847
         Yes       0.86      0.89      0.88      7153

    accuracy                           0.91     20000
   macro avg       0.90      0.91      0.90     20000
weighted avg       0.91      0.91      0.91     20000



### Naive Bayes

In [37]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train,Y_train)
gnb_y_pred_2 = gnb.predict(X_test)
print('Naive Bayes Accuracy: ', accuracy_score(Y_test,gnb_y_pred_2)*100)

Naive Bayes Accuracy:  82.85


In [38]:
print(confusion_matrix(Y_test, gnb_y_pred_2))
print(classification_report(Y_test, gnb_y_pred_2))

[[9422 3425]
 [   5 7148]]
              precision    recall  f1-score   support

          No       1.00      0.73      0.85     12847
         Yes       0.68      1.00      0.81      7153

    accuracy                           0.83     20000
   macro avg       0.84      0.87      0.83     20000
weighted avg       0.88      0.83      0.83     20000



### K-Nearest Neighbors

In [39]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train,Y_train)
knn_y_pred_2 = knn.predict(X_test)
print('K-Nearest Neighbors Accuracy: ', accuracy_score(Y_test,knn_y_pred_2)*100)

K-Nearest Neighbors Accuracy:  92.745


In [40]:
print(confusion_matrix(Y_test, knn_y_pred_2))
print(classification_report(Y_test, knn_y_pred_2))

[[11849   998]
 [  453  6700]]
              precision    recall  f1-score   support

          No       0.96      0.92      0.94     12847
         Yes       0.87      0.94      0.90      7153

    accuracy                           0.93     20000
   macro avg       0.92      0.93      0.92     20000
weighted avg       0.93      0.93      0.93     20000



### Stochastic Gradient Descent

In [41]:
from sklearn.linear_model import SGDClassifier
sgd = SGDClassifier(loss = 'modified_huber',shuffle = True, random_state = 101)
sgd.fit(X_train,Y_train)
sgd_y_pred_2 = sgd.predict(X_test)
print('Stochastic Gradient Descent Accuracy: ', accuracy_score(Y_test,sgd_y_pred_2)*100)

Stochastic Gradient Descent Accuracy:  90.97


In [42]:
print(confusion_matrix(Y_test, sgd_y_pred_2))
print(classification_report(Y_test, sgd_y_pred_2))

[[11291  1556]
 [  250  6903]]
              precision    recall  f1-score   support

          No       0.98      0.88      0.93     12847
         Yes       0.82      0.97      0.88      7153

    accuracy                           0.91     20000
   macro avg       0.90      0.92      0.91     20000
weighted avg       0.92      0.91      0.91     20000



### Support Vector Machines

In [43]:
from sklearn import svm
svm = svm.SVC()
svm.fit(X_train,Y_train)
svm_y_pred_2 = svm.predict(X_test)
print('Support Vector Machines Accuracy: ', accuracy_score(Y_test,svm_y_pred_2)*100)

Support Vector Machines Accuracy:  93.50500000000001


In [44]:
print(confusion_matrix(Y_test, svm_y_pred_2))
print(classification_report(Y_test, svm_y_pred_2))

[[12059   788]
 [  511  6642]]
              precision    recall  f1-score   support

          No       0.96      0.94      0.95     12847
         Yes       0.89      0.93      0.91      7153

    accuracy                           0.94     20000
   macro avg       0.93      0.93      0.93     20000
weighted avg       0.94      0.94      0.94     20000

