In [1]:
import pandas as pd

# Reading Data

In [2]:
df = pd.read_csv("data.csv")

In [3]:
df.head()

Unnamed: 0,fever,bodyPain,age,runnyNose,diffBreath,infectionProb
0,100.4968,0,91,0,1,1
1,100.128159,1,64,0,-1,1
2,101.625088,1,14,1,1,1
3,98.570032,0,67,0,1,0
4,101.714623,1,87,1,-1,0


In [4]:
df.tail()

Unnamed: 0,fever,bodyPain,age,runnyNose,diffBreath,infectionProb
2570,98.676436,0,27,0,-1,0
2571,101.876336,1,66,1,-1,1
2572,101.538983,0,68,1,-1,0
2573,101.587521,1,78,0,-1,0
2574,101.733046,0,95,0,1,0


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2575 entries, 0 to 2574
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   fever          2575 non-null   float64
 1   bodyPain       2575 non-null   int64  
 2   age            2575 non-null   int64  
 3   runnyNose      2575 non-null   int64  
 4   diffBreath     2575 non-null   int64  
 5   infectionProb  2575 non-null   int64  
dtypes: float64(1), int64(5)
memory usage: 120.8 KB


In [6]:
df["diffBreath"].value_counts()

 1    884
 0    859
-1    832
Name: diffBreath, dtype: int64

In [7]:
df.describe()

Unnamed: 0,fever,bodyPain,age,runnyNose,diffBreath,infectionProb
count,2575.0,2575.0,2575.0,2575.0,2575.0,2575.0
mean,99.997065,0.481553,50.184466,0.506408,0.020194,0.495146
std,1.141608,0.499757,28.79755,0.500056,0.816247,0.500074
min,98.000054,0.0,1.0,0.0,-1.0,0.0
25%,99.003773,0.0,25.0,0.0,-1.0,0.0
50%,99.99093,0.0,50.0,1.0,0.0,0.0
75%,100.974188,1.0,75.0,1.0,1.0,1.0
max,101.999337,1.0,100.0,1.0,1.0,1.0


# Train Test Splitting

In [8]:
import numpy as np

In [9]:
def data_split(data, ratio):
    np.random.seed(42)
    shuffled = np.random.permutation(len(data)) # array([1,4,5,6,2,0,3])
    test_set_size = int(len(data) * ratio)
    test_indices = shuffled[:test_set_size]
    train_indices = shuffled[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

In [10]:
train, test = data_split(df, 0.2)

In [11]:
train

Unnamed: 0,fever,bodyPain,age,runnyNose,diffBreath,infectionProb
1703,100.651618,1,89,1,0,0
1675,99.817290,0,78,0,0,0
1518,98.990062,0,22,1,-1,1
361,101.663457,1,58,1,0,1
1544,99.004261,1,46,0,1,0
...,...,...,...,...,...,...
1638,101.221909,1,37,0,0,0
1095,99.051317,1,45,0,0,0
1130,101.811665,1,87,0,1,1
1294,99.853814,1,28,0,0,1


In [12]:
test

Unnamed: 0,fever,bodyPain,age,runnyNose,diffBreath,infectionProb
1644,98.047800,1,14,0,-1,1
1189,101.716690,1,1,0,0,0
495,100.308860,1,26,0,0,1
1656,101.864852,0,1,0,1,1
651,101.579651,1,91,1,-1,0
...,...,...,...,...,...,...
1627,99.647300,0,31,0,-1,1
471,101.758483,0,61,0,1,0
700,99.133574,0,85,0,-1,0
25,100.292827,1,64,0,-1,1


In [13]:
x_train = train[["fever", "bodyPain", "age", "runnyNose", "diffBreath"]]

In [14]:
x_train

Unnamed: 0,fever,bodyPain,age,runnyNose,diffBreath
1703,100.651618,1,89,1,0
1675,99.817290,0,78,0,0
1518,98.990062,0,22,1,-1
361,101.663457,1,58,1,0
1544,99.004261,1,46,0,1
...,...,...,...,...,...
1638,101.221909,1,37,0,0
1095,99.051317,1,45,0,0
1130,101.811665,1,87,0,1
1294,99.853814,1,28,0,0


In [15]:
x_train.to_numpy()

array([[100.6516175 ,   1.        ,  89.        ,   1.        ,
          0.        ],
       [ 99.8172905 ,   0.        ,  78.        ,   0.        ,
          0.        ],
       [ 98.99006186,   0.        ,  22.        ,   1.        ,
         -1.        ],
       ...,
       [101.8116647 ,   1.        ,  87.        ,   0.        ,
          1.        ],
       [ 99.85381436,   1.        ,  28.        ,   0.        ,
          0.        ],
       [101.8385375 ,   1.        ,  19.        ,   1.        ,
          1.        ]])

In [16]:
x_train = train[["fever", "bodyPain", "age", "runnyNose", "diffBreath"]].to_numpy()

In [17]:
x_train

array([[100.6516175 ,   1.        ,  89.        ,   1.        ,
          0.        ],
       [ 99.8172905 ,   0.        ,  78.        ,   0.        ,
          0.        ],
       [ 98.99006186,   0.        ,  22.        ,   1.        ,
         -1.        ],
       ...,
       [101.8116647 ,   1.        ,  87.        ,   0.        ,
          1.        ],
       [ 99.85381436,   1.        ,  28.        ,   0.        ,
          0.        ],
       [101.8385375 ,   1.        ,  19.        ,   1.        ,
          1.        ]])

In [18]:
x_test = test[["fever", "bodyPain", "age", "runnyNose", "diffBreath"]].to_numpy()

In [19]:
x_test

array([[ 98.04779973,   1.        ,  14.        ,   0.        ,
         -1.        ],
       [101.7166895 ,   1.        ,   1.        ,   0.        ,
          0.        ],
       [100.3088597 ,   1.        ,  26.        ,   0.        ,
          0.        ],
       ...,
       [ 99.13357377,   0.        ,  85.        ,   0.        ,
         -1.        ],
       [100.2928271 ,   1.        ,  64.        ,   0.        ,
         -1.        ],
       [100.8284327 ,   0.        ,   1.        ,   1.        ,
         -1.        ]])

In [20]:
y_train = train[["infectionProb"]].to_numpy().reshape(2060,)
y_test = test[["infectionProb"]].to_numpy().reshape(515,)

In [21]:
y_train

array([0, 0, 1, ..., 1, 1, 0], dtype=int64)

In [22]:
y_test

array([1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1,
       0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1,
       1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0,
       1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1,
       1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0,
       1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1,
       0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1,

In [23]:
from sklearn.linear_model import LogisticRegression

In [24]:
clf = LogisticRegression()
clf.fit(x_train, y_train)

LogisticRegression()

In [25]:
clf.predict([[100,1,22,-1,1]])

array([0], dtype=int64)

In [26]:
infProb = clf.predict_proba([[102,1,22,-1,1]])[0][1]

In [27]:
infProb

0.45935687724795327