In [1]:
import pandas as pd
import numpy as np

### Brief Look at Data

In [2]:
data = pd.read_csv('covid_data.csv')
data.head()

# 1-> Above 50%, 0-> Below 50% (Infection Probability)
# -1-> Little, 0-> Mediocre, 1-> Severe

Unnamed: 0,age,throat_pain,breathing_problem,body_temp,headache,bodyache,infection_prob
0,80,1,0,64,0,-1,0
1,23,0,0,86,1,0,1
2,59,1,0,71,1,1,1
3,57,1,1,66,0,-1,0
4,57,1,-1,83,1,-1,1


### Data for Model

In [3]:
features = data.iloc[:,0:-1]
labels = data['infection_prob']
features.tail()

Unnamed: 0,age,throat_pain,breathing_problem,body_temp,headache,bodyache
2910,69,1,1,88,0,-1
2911,27,0,1,40,1,1
2912,58,1,-1,89,0,0
2913,81,1,-1,88,1,-1
2914,69,0,-1,38,1,-1


In [4]:
from sklearn.preprocessing import MinMaxScaler

# Scaling age
scaler = MinMaxScaler().fit(features[features.columns])
features[features.columns] = scaler.transform(features[features.columns])
features.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features[features.columns] = scaler.transform(features[features.columns])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())


Unnamed: 0,age,throat_pain,breathing_problem,body_temp,headache,bodyache
0,0.789474,1.0,0.5,0.397059,0.0,0.0
1,0.189474,0.0,0.5,0.720588,1.0,0.5
2,0.568421,1.0,0.5,0.5,1.0,1.0
3,0.547368,1.0,1.0,0.426471,0.0,0.0
4,0.547368,1.0,0.0,0.676471,1.0,0.0


### Model Imports and Training

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [6]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

### Model Training and Prediction

In [7]:
model = LogisticRegression()
model.fit(X_train, y_train)

LogisticRegression()

In [8]:
model.score(X_test, y_test) # Accuracy is 50% ;(

0.5008576329331046

In [9]:
infection_prob = model.predict_proba(X_test)
infection_prob

array([[0.47472777, 0.52527223],
       [0.46149056, 0.53850944],
       [0.48425398, 0.51574602],
       ...,
       [0.55353151, 0.44646849],
       [0.48835632, 0.51164368],
       [0.48171073, 0.51828927]])

In [10]:
from sklearn.ensemble import RandomForestClassifier

In [11]:
rf = RandomForestClassifier(n_estimators=100) # Iske hyperparameters tune krlena jaese meine n_estimators ki value set ki hai to aur bhi hyperparams hote hai

In [12]:
rf.fit(X_train, y_train)

RandomForestClassifier()

In [13]:
rf.feature_importances_ # Importance of each feature (left to right column wise)

array([0.42716342, 0.02569175, 0.05276086, 0.41284096, 0.02946666,
       0.05207634])

In [14]:
rf_preds = rf.predict(X_test)
rf.score(X_test, y_test)

0.5077186963979416

### Tensorflow Simple DL Model (Basic architecture of a DL model used for classification problems) (Not a CNN)

In [15]:
import tensorflow as tf

In [16]:
model1 = tf.keras.Sequential([
        tf.keras.layers.Dense(8, activation="relu"), # Dense layer Fully Connected layers hai, 
        tf.keras.layers.Dense(4, activation="relu"),
        tf.keras.layers.Dense(1, activation="sigmoid") # Layers add krke dekho, no of neurons change krke dekho ye sab bhi hyper param tuning hai DL mein
])

model1.compile(loss=tf.keras.losses.BinaryCrossentropy(),
              optimizer=tf.keras.optimizers.SGD(), # Adam bhi choose kr skte hai, optimizer ke bhi hyper param hote hai learning_rate, beta1, epsilon ye sab inki values change krke dekhna result change ho jayega
              metrics=["accuracy"])

history1 = model1.fit(X_train, y_train, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [17]:
model1.evaluate(X_test, y_test)



[0.6936559677124023, 0.4939965605735779]

In [18]:
"""[0.691055178642273, 0.536878228187561]
69% Yes, 51%No
"""

'[0.691055178642273, 0.536878228187561]\n69% Yes, 51%No\n'

In [19]:
"Avg Loss: ", float(tf.reduce_mean(history1.history["loss"]))

('Avg Loss: ', 0.6931141018867493)

In [20]:
"Avg Accuracy: ", float(tf.reduce_mean(history1.history["accuracy"]))

('Avg Accuracy: ', 0.5097483992576599)

In [36]:
pred = model1.predict([[5, 1, 1, 38, 0, 0]])
float(pred)

0.4910893142223358

### Saving Tensorflow Model

In [21]:
model1.save("final_model")

INFO:tensorflow:Assets written to: final_model/assets


### Saving the Model using Pickle. (Best model aese save krlo taaki baar baar train na krna pade)

> DL mein keras mein already ek function hai save_model krke. Lekin wo sirf tensorflow ke sath he kaam krege. TF models are not picklable. SKlearn model ko neeche jaese save krke use kr skte hai TF wale nhi kr skte.

In [16]:
import pickle

model_name = 'covid_predict.sav'
file = open(model_name,'wb')
pickle.dump(model, file)
file.close()