In [1]:
import pandas as pd

## Reading Data

In [2]:
df = pd.read_csv('data.csv')

In [3]:
df.head()

Unnamed: 0,fever,DryCough,Fatigue,ChestPain,DiffBreath,LossofTS,InfProb
0,103.0,0,1,0,-1,1,0
1,97.0,0,1,0,-1,0,0
2,97.0,0,0,0,-1,0,0
3,98.0,1,1,1,1,1,1
4,102.0,0,1,0,-1,1,1


In [4]:
df.tail()

Unnamed: 0,fever,DryCough,Fatigue,ChestPain,DiffBreath,LossofTS,InfProb
117,98.6,0,0,0,-1,0,0
118,100.7,1,1,1,1,1,1
119,96.4,1,1,1,1,1,1
120,99.8,1,1,0,-1,1,1
121,97.7,1,1,1,1,1,1


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122 entries, 0 to 121
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   fever       122 non-null    float64
 1   DryCough    122 non-null    int64  
 2   Fatigue     122 non-null    int64  
 3   ChestPain   122 non-null    int64  
 4   DiffBreath  122 non-null    int64  
 5   LossofTS    122 non-null    int64  
 6   InfProb     122 non-null    int64  
dtypes: float64(1), int64(6)
memory usage: 6.8 KB


In [6]:
df['fever'].value_counts()

100.0    8
98.6     8
101.0    7
102.0    6
98.0     5
98.5     5
97.0     5
99.2     5
99.0     4
98.1     4
100.4    4
97.7     3
99.8     3
99.6     3
102.2    3
101.2    3
96.9     3
101.4    2
97.3     2
102.4    2
100.7    2
101.3    2
103.0    2
96.8     2
97.5     2
96.4     2
98.3     1
100.8    1
100.6    1
108.0    1
97.4     1
102.5    1
98.8     1
96.5     1
98.4     1
99.7     1
100.5    1
96.6     1
96.3     1
96.1     1
101.1    1
96.7     1
97.8     1
101.9    1
99.4     1
98.7     1
99.1     1
97.9     1
102.3    1
101.6    1
98.2     1
Name: fever, dtype: int64

In [7]:
df['DiffBreath'].value_counts()

-1    73
 0    26
 1    23
Name: DiffBreath, dtype: int64

In [8]:
df.describe()

Unnamed: 0,fever,DryCough,Fatigue,ChestPain,DiffBreath,LossofTS,InfProb
count,122.0,122.0,122.0,122.0,122.0,122.0,122.0
mean,99.445082,0.622951,0.754098,0.311475,-0.409836,0.762295,0.770492
std,1.949618,0.486646,0.432396,0.465006,0.789959,0.427433,0.422251
min,96.1,0.0,0.0,0.0,-1.0,0.0,0.0
25%,98.0,0.0,1.0,0.0,-1.0,1.0,1.0
50%,99.2,1.0,1.0,0.0,-1.0,1.0,1.0
75%,101.0,1.0,1.0,1.0,0.0,1.0,1.0
max,108.0,1.0,1.0,1.0,1.0,1.0,1.0


## Train test spliting

In [9]:
import numpy as np

In [10]:
def data_split(data,ratio):
    np.random.seed(42)
    shuffled = np.random.permutation(len(data))
    test_set_size = int(len(data)*ratio)
    test_indices = shuffled[:test_set_size]
    train_indices = shuffled[test_set_size:]
    return data.iloc[train_indices],data.iloc[test_indices]

In [11]:
train,test = data_split(df,0.2)

In [12]:
train

Unnamed: 0,fever,DryCough,Fatigue,ChestPain,DiffBreath,LossofTS,InfProb
24,98.4,0,0,0,-1,0,0
42,100.0,0,0,0,-1,1,1
105,101.0,1,1,1,0,1,1
93,97.7,1,1,0,-1,1,1
36,97.5,0,0,0,-1,0,0
...,...,...,...,...,...,...,...
106,102.0,1,1,1,1,1,1
14,98.6,0,0,0,-1,0,0
92,99.0,1,1,1,1,1,1
51,102.4,0,1,0,-1,1,1


In [13]:
test

Unnamed: 0,fever,DryCough,Fatigue,ChestPain,DiffBreath,LossofTS,InfProb
18,100.4,1,1,1,1,1,1
45,99.2,0,0,0,-1,0,0
47,102.4,1,1,0,-1,1,1
89,99.2,1,1,0,-1,1,1
4,102.0,0,1,0,-1,1,1
40,101.4,1,1,0,-1,1,1
62,98.5,1,1,0,-1,1,1
107,102.2,1,1,0,-1,1,1
31,97.0,0,0,0,-1,0,0
55,100.0,0,0,0,1,1,1


In [14]:
X_train = train[['fever','DryCough','Fatigue','ChestPain','DiffBreath','LossofTS']]
X_test = test[['fever','DryCough','Fatigue','ChestPain','DiffBreath','LossofTS']]

In [15]:
Y_train = train[['InfProb']].to_numpy().reshape(98,)
Y_test = test[['InfProb']].to_numpy().reshape(24,)

In [16]:
Y_train

array([0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 1, 1], dtype=int64)

In [17]:
from sklearn.linear_model import LogisticRegression

In [18]:
clf = LogisticRegression()
clf.fit(X_train, Y_train)

LogisticRegression()

In [21]:
inputFeatures = [98,0,0,0,0,0]
InfProb = clf.predict_proba([inputFeatures])[0][1]

In [22]:
InfProb

0.21608385739885003