In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler

In [3]:
data = pd.read_csv("BP.csv")
data.head()

Unnamed: 0,Date,SysPressure,DiaPressure,Time
0,18/3,141,92,20:30
1,19/3,130,83,09
2,19/3,120,80,13:30
3,19/3,120,83,16
4,19/3,122,79,18


In [4]:
data = data.drop('Date',axis=1) # Date is not important to us

In [5]:
A = np.array(data['Time'])
print(A.shape)

(20,)


In [6]:
Time_Cat = np.array([]) # Column for time periods
Time_Cat_num = np.empty(0,int) # Column for catergorical values of periods
for i in range(0,A.shape[0]):
    str = A[i]
    if(str[0:2] <= '12'):
        Time_Cat = np.append(Time_Cat,"Morning")
        Time_Cat_num = np.append(Time_Cat_num,0)
    elif(str[0:2] >'12' and str[0:2] <='18'):
        Time_Cat = np.append(Time_Cat,"Afternoon")
        Time_Cat_num = np.append(Time_Cat_num,1)
    elif(str[0:2] >'18' and str[0:2] <='24'):
        Time_Cat = np.append(Time_Cat,"Evening")
        Time_Cat_num = np.append(Time_Cat_num,2)
    else:
        pass

In [7]:
data['TimePeriod'] = Time_Cat
data['Categorical Values'] = Time_Cat_num
data = data[['Time','TimePeriod','Categorical Values','SysPressure','DiaPressure']]
data.head()

Unnamed: 0,Time,TimePeriod,Categorical Values,SysPressure,DiaPressure
0,20:30,Evening,2,141,92
1,09,Morning,0,130,83
2,13:30,Afternoon,1,120,80
3,16,Afternoon,1,120,83
4,18,Afternoon,1,122,79


### One Hot Encoding Categorical Values

In [8]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

columnTransformer = ColumnTransformer([('encoder',OneHotEncoder(),
                                        [2])],
                                      remainder='passthrough')

arr = np.array(columnTransformer.fit_transform(data), dtype = np.str)
arr2 = np.delete(arr,[3,4],axis=1)
FinalData = pd.DataFrame(arr2,columns=["Morning","Afternoon","Evening","SysPressure","DiaPressure"])


### Final DataFrame

In [9]:
FinalData

Unnamed: 0,Morning,Afternoon,Evening,SysPressure,DiaPressure
0,0.0,0.0,1.0,141,92
1,1.0,0.0,0.0,130,83
2,0.0,1.0,0.0,120,80
3,0.0,1.0,0.0,120,83
4,0.0,1.0,0.0,122,79
5,0.0,0.0,1.0,125,86
6,0.0,0.0,1.0,130,90
7,1.0,0.0,0.0,126,89
8,0.0,1.0,0.0,130,86
9,0.0,1.0,0.0,120,86


In [10]:
X = FinalData.iloc[:,0:3].values
Y_1 = FinalData.iloc[:,3:4].values
Y_2 = FinalData.iloc[:,4:5].values

### Model 1 ( Time - SysPressure)

In [11]:
Xtrain,Xtest,ytrain,ytest = train_test_split(X,Y_1,test_size=0.3,random_state =2)
print(Xtrain.shape,Xtest.shape,ytrain.shape,ytest.shape)

reg1 = LinearRegression()
model1 = reg1.fit(Xtrain,ytrain)
preds = model1.predict(Xtest)

(14, 3) (6, 3) (14, 1) (6, 1)


### Model 2 ( Time - DiaPressure)

In [28]:
Xtrain2,Xtest2,ytrain2,ytest2 = train_test_split(X,Y_2,test_size=0.3,random_state =2)
print(Xtrain2.shape,Xtest2.shape,ytrain2.shape,ytest2.shape)

reg2 = LinearRegression()
model2 = reg2.fit(Xtrain2,ytrain2)
preds2 = model2.predict(Xtest2)

(14, 3) (6, 3) (14, 1) (6, 1)


### Measuring the 'Goodness' of our regression Fit

In [29]:
print("MODEL 1")
print('R^2 value: ',r2_score(ytest,preds))
print('RMSE value: ',mean_squared_error(ytest,preds))
print(" ")
print("MODEL 2")
print('R^2 value: ',r2_score(ytest2,preds2))
print('RMSE value: ',mean_squared_error(ytest2,preds2))

MODEL 1
R^2 value:  0.4586167341430496
RMSE value:  29.71592592592594
 
MODEL 2
R^2 value:  0.18039062499999847
RMSE value:  11.65666666666669


In [17]:
df2 = pd.DataFrame({'Actual': ytest.flatten(), 'Predicted': preds.flatten()})
df3 = pd.DataFrame({'Actual': ytest2.flatten(), 'Predicted': preds2.flatten()})
print("Systolic Pressure")
display(df2)
print(" ")
print("Diastolic Pressure")
display(df3)

Systolic Pressure


Unnamed: 0,Actual,Predicted
0,130,130.0
1,122,124.166667
2,120,123.2
3,141,130.0
4,120,124.166667
5,125,130.0


 
Diastolic Pressure


Unnamed: 0,Actual,Predicted
0,86,90.0
1,79,84.5
2,85,83.8
3,92,90.0
4,86,84.5
5,86,90.0


In [20]:
str = input("Enter period of day")
if str.lower() == "morning":
    testVal = np.array([[1,0,0]])
elif str.lower() == "afternoon":
    testVal = np.array([[0,1,0]])
elif str.lower() == "evening":
    testVal = np.array([[0,0,1]])
else:
    print("Wrong input")

print("Predicted SysPressure: ",reg1.predict(testVal))
print("Predicted DiaPressure: ",reg2.predict(testVal))

Enter period of day morning


Predicted SysPressure:  [[123.2]]
Predicted DiaPressure:  [[84.75]]


### Model metrics at different sample levels

In [30]:
A = {'Rsquared model 1':[0.458,0,0,0],'RMSE model 1':[29.71,0,0,0],'Rsquared model 2':[0.180,0,0,0],'RMSE model 2':[11.65,0,0,0]}
Table = pd.DataFrame(A,index=['20 samples','50 samples','100 samples','150 samples'])
Table

Unnamed: 0,Rsquared model 1,RMSE model 1,Rsquared model 2,RMSE model 2
20 samples,0.458,29.71,0.18,11.65
50 samples,0.0,0.0,0.0,0.0
100 samples,0.0,0.0,0.0,0.0
150 samples,0.0,0.0,0.0,0.0
