##Importing the Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

##Importing the Dataset

In [2]:
df = pd.read_csv('/content/final_data_BP.csv')
df.drop('ID', axis=1 , inplace=True)

In [3]:
df.head()

Unnamed: 0,Gender,Age,Alchohol,Smoking,Non-Veg,Exercise,Medication,BMI,BP_best
0,Male,37,No,No,No,No,No,24.1,1
1,Male,47,No,No,No,Yes,Yes(Anxiety/B.P.),22.8,6
2,Female,47,No,No,No,No,Yes(Thyroid),28.0,3
3,Female,35,No,No,No,No,No,27.4,4
4,Female,33,No,No,No,No,No,22.6,5


In [4]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(sampling_strategy="not majority")
X_res, y_res = ros.fit_resample(X, y)

In [5]:
y_res

0     1
1     6
2     3
3     4
4     5
     ..
91    6
92    6
93    6
94    6
95    6
Name: BP_best, Length: 96, dtype: int64

In [6]:
y_res.value_counts()

1    16
6    16
3    16
4    16
5    16
2    16
Name: BP_best, dtype: int64

In [7]:
X = X_res.values
y = y_res.values

##LabelEncoding

In [8]:
for i in range(X.shape[0]):
  for j in range(0, 6):
    if X[i, j] == 'No' or X[i,j] == "Male" or X[i, j] == 'No ':
      X[i, j] = 0
    elif X[i, j] == 'Yes' or X[i,j] == "Female":
      X[i, j] = 1

for i in range(X.shape[0]):
  for j in range(6,7):
    if X[i,j] == 'No':
      X[i,j] = 0
    else:
      X[i,j] = 1

##Splitting the Dataset

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [10]:
X_train.shape

(76, 8)

##Feature Scaling

In [11]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.fit_transform(X_test)

#MODELS

In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, BaggingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [13]:
# Function to print evaluation metrics
def print_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred) * 100

    precision_macro = precision_score(y_true, y_pred, average='macro')
    recall_macro = recall_score(y_true, y_pred, average='macro')
    f1_macro = f1_score(y_true, y_pred, average='macro')

    print(f"Accuracy: {accuracy:.10f} %")
    print(f"Precision: {precision_macro:.10f}")
    print(f"Recall: {recall_macro:.10f}")
    print(f"F1 Score: {f1_macro:.10f}")

In [14]:
import warnings
warnings.filterwarnings("ignore")

###Logistic Regression

In [15]:
lr = LogisticRegression()
lr_params = {
    'penalty': ['l1', 'l2'],
    'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'max_iter': [50, 100, 200, 500, 1000],
    'class_weight': [None, 'balanced'],
    'random_state': [42]
}
grid_lr = GridSearchCV(lr, lr_params, cv=5)
grid_lr.fit(X_train_scaled, y_train)
y_pred_lr = grid_lr.predict(X_test_scaled)

# Print details of the Logistic Regression model
print("Logistic Regression:")
print("Best Parameters:", grid_lr.best_params_)
print("Best Score:", grid_lr.best_score_* 100 )
print_metrics(y_test, y_pred_lr)
print("\n")

Logistic Regression:
Best Parameters: {'C': 1, 'class_weight': None, 'max_iter': 50, 'penalty': 'l2', 'random_state': 42, 'solver': 'newton-cg'}
Best Score: 41.833333333333336
Accuracy: 45.0000000000 %
Precision: 0.4000000000
Recall: 0.5277777778
F1 Score: 0.4264550265




In [16]:
import pickle

with open('BP_basic_logistic.pkl', 'wb') as f:
    pickle.dump(grid_lr, f)

##KNeighbors

In [17]:
knn = KNeighborsClassifier()
knn_params = {
    'n_neighbors': [3, 5, 7, 9, 11, 15, 20],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': [30, 40, 50, 60],
    'p': [1, 2]
}
grid_knn = GridSearchCV(knn, knn_params, cv=5)
grid_knn.fit(X_train_scaled, y_train)
y_pred_knn = grid_knn.predict(X_test_scaled)

# Print details of the k-Nearest Neighbors model
print("k-Nearest Neighbors:")
print("Best Parameters:", grid_knn.best_params_)
print("Best Score:", grid_knn.best_score_)
print_metrics(y_test, y_pred_knn)
print("\n")

k-Nearest Neighbors:
Best Parameters: {'algorithm': 'auto', 'leaf_size': 30, 'n_neighbors': 9, 'p': 1, 'weights': 'distance'}
Best Score: 0.59
Accuracy: 55.0000000000 %
Precision: 0.5000000000
Recall: 0.6250000000
F1 Score: 0.5269841270




In [18]:
with open('BP_basic_KNN.pkl', 'wb') as f:
    pickle.dump(grid_knn, f)

##SVM

In [19]:
svm = SVC()
svm_params = {
    'C': [0.1, 1, 10, 100, 1000],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto', 0.1, 1, 10, 100],
    'degree': [2, 3, 4],
    'coef0': [0.0, 0.5, 1.0]
}

grid_svm = GridSearchCV(svm, svm_params, cv=5)
grid_svm.fit(X_train_scaled, y_train)
y_pred_svm = grid_svm.predict(X_test_scaled)

# Print details of the Support Vector Machines model
print("Support Vector Machines:")
print("Best Parameters:", grid_svm.best_params_)
print("Best Score:", grid_svm.best_score_)
print_metrics(y_test, y_pred_svm)
print("\n")


Support Vector Machines:
Best Parameters: {'C': 10, 'coef0': 0.0, 'degree': 2, 'gamma': 1, 'kernel': 'rbf'}
Best Score: 0.6699999999999999
Accuracy: 60.0000000000 %
Precision: 0.5892857143
Recall: 0.6666666667
F1 Score: 0.5919312169




In [20]:
with open('BP_basic_SVM.pkl', 'wb') as f:
    pickle.dump(grid_svm, f)

##Decision Trees

In [21]:
dt = DecisionTreeClassifier()
dt_params = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 5, 10, 20, 30, 50, 100],
    'min_samples_split': [2, 5, 10, 20, 30, 50],
    'min_samples_leaf': [1, 2, 4, 8, 10, 20]
}

grid_dt = GridSearchCV(dt, dt_params, cv=5)
grid_dt.fit(X_train, y_train)
y_pred_dt = grid_dt.predict(X_test)

# Print details of the Decision Trees model
print("Decision Trees:")
print("Best Parameters:", grid_dt.best_params_)
print("Best Score:", grid_dt.best_score_)
print_metrics(y_test, y_pred_dt)
print("\n")


Decision Trees:
Best Parameters: {'criterion': 'entropy', 'max_depth': 100, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'random'}
Best Score: 0.6575
Accuracy: 60.0000000000 %
Precision: 0.5222222222
Recall: 0.6250000000
F1 Score: 0.5560846561




In [22]:
with open('BP_basic_DecTree.pkl', 'wb') as f:
    pickle.dump(grid_dt, f)

##Random Forest

In [23]:
rf = RandomForestClassifier()
rf_params = {
    'n_estimators': [50, 100, 200, 400, 800],
    'criterion': ['gini'],
    'max_depth': [None, 5, 10, 20,50],
    'min_samples_leaf': [1, 2, 4, 8]
}
grid_rf = GridSearchCV(rf, rf_params, cv=5)
grid_rf.fit(X_train, y_train)
y_pred_rf = grid_rf.predict(X_test)

# Print details of the Random Forest model
print("Random Forest:")
print("Best Parameters:", grid_rf.best_params_)
print("Best Score:", grid_rf.best_score_)
print_metrics(y_test, y_pred_rf)
print("\n")

Random Forest:
Best Parameters: {'criterion': 'gini', 'max_depth': 20, 'min_samples_leaf': 1, 'n_estimators': 100}
Best Score: 0.5766666666666667
Accuracy: 65.0000000000 %
Precision: 0.5722222222
Recall: 0.7083333333
F1 Score: 0.6123015873




In [24]:
with open('BP_basic_RanFor.pkl', 'wb') as f:
    pickle.dump(grid_rf, f)

##NaiveBayes

In [25]:
gnb = GaussianNB()
gnb_params = {
    'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5]  # Varying the smoothing parameter
}
grid_gnb = GridSearchCV(gnb, gnb_params, cv=5)
grid_gnb.fit(X_train, y_train)

y_pred_gnb = grid_gnb.predict(X_test)

print("Gaussian Naive Bayes:")
print("Best Parameters:", grid_gnb.best_params_)
print("Best Score:", grid_gnb.best_score_)
print_metrics(y_test, y_pred_gnb)
print("\n")


Gaussian Naive Bayes:
Best Parameters: {'var_smoothing': 1e-09}
Best Score: 0.2891666666666667
Accuracy: 35.0000000000 %
Precision: 0.1916666667
Recall: 0.3083333333
F1 Score: 0.2333333333




In [26]:
with open('BP_basic_NaiveBayes.pkl', 'wb') as f:
    pickle.dump(grid_gnb, f)