In [None]:
!pip install wfdb

In [None]:
from google.colab import drive
import numpy as np
import wfdb
import pandas as pd
from scipy.signal import find_peaks
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
subject_info= pd.read_csv('/content/drive/MyDrive/mini_project_dataset/subject-info.csv')
hr_info = pd.read_csv('/content/drive/MyDrive/mini_project_dataset/quality-hr-ann.csv')
af_files = ['/content/drive/MyDrive/MIMIC perform AF non-AF database/mimic_perform_af_csv/mimic_perform_af_00{}_data.csv'.format(i) for i in range(1, 20)]

In [None]:
ppg_files=[]
id=[]
for i in range(0,12):
  for j in range(1,5):
    num=str(100000+i*1000+j)
    id.append(num)
    filepath='/content/drive/MyDrive/mini_project_dataset/'+num+'/'+num+'_PPG'
    ppg_files.append(filepath)

In [None]:
def load_ppg_data(ppg_files):
    ppg_data = []

    for file_path in ppg_files:
        record = wfdb.rdrecord(file_path.replace('.dat', ''))
        ppg_data.append(record.p_signal)

    return ppg_data

In [None]:
ppg_data = load_ppg_data(ppg_files)
amp=[]
pi=[]
for signal in ppg_data:
  ppg_signal = signal.flatten()
  pulse_wave_amplitude = np.max(ppg_signal) - np.min(ppg_signal)
  ac_component = np.abs(ppg_signal - np.mean(ppg_signal))
  dc_component = np.mean(ac_component)
  perfusion_index = np.max(ac_component) / dc_component
  amp.append(pulse_wave_amplitude)
  pi.append(perfusion_index)
subject_info['HR']=hr_info['HR']
subject_info['Amplitude']=amp
subject_info['PI']=pi
list_of_tuples = list(zip(hr_info['Quality']))
y=pd.DataFrame(list_of_tuples,columns=[ 'Quality'])

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
label_encoder = LabelEncoder()
subject_info['Gender'] = label_encoder.fit_transform(subject_info['Gender'])
X_train, X_test, y_train, y_test = train_test_split(subject_info, y, test_size=0.2, random_state=42)
classifier=LogisticRegression()
parameter={'C':[1,2,3,4,5,6,10,15,20,25,30],'max_iter':[100,200,300]}
classifier_regressor=GridSearchCV(classifier,param_grid=parameter,scoring='accuracy',cv=5)
classifier_regressor.fit(X_train,y_train)
print("*")
print(classifier_regressor.best_score_)

In [None]:
from sklearn.metrics import accuracy_score,classification_report
y_pred=classifier_regressor.predict(X_test)
score=accuracy_score(y_pred,y_test)
print(score)

0.7


In [None]:
num_diastolic_peaks_list=[]
zero_crossing_rates=[]
snr_values=[]
for signal in ppg_data:
  ppg_signal = signal.flatten()
  diastolic_peaks = find_peaks(-ppg_signal, height=-0.5)[0]
  num_diastolic_peaks = len(diastolic_peaks)
  zero_crossings = np.count_nonzero(np.diff(np.sign(ppg_signal)))
  snr = np.max(ppg_signal) / np.std(ppg_signal)
  num_diastolic_peaks_list.append(num_diastolic_peaks)
  zero_crossing_rates.append(zero_crossings)
  snr_values.append(snr)
subject_info.drop(["Amplitude","PI"],axis=1, inplace=True)
subject_info["num_diastolic_peaks"]=num_diastolic_peaks_list
subject_info["zero_crossing_rate"]=zero_crossing_rates
subject_info["snr_value"]=snr_values

In [None]:
label_encoder = LabelEncoder()
subject_info['Gender'] = label_encoder.fit_transform(subject_info['Gender'])
X_train, X_test, y_train, y_test = train_test_split(subject_info, y, test_size=0.2, random_state=42)
classifier=LogisticRegression()
parameter={'C':[1,2,3,4,5,6,10,15,20,25,30],'max_iter':[100,200,300]}
classifier_regressor=GridSearchCV(classifier,param_grid=parameter,scoring='accuracy',cv=5)
classifier_regressor.fit(X_train,y_train)
print("*")
print(classifier_regressor.best_score_)

In [None]:
from sklearn.metrics import accuracy_score,classification_report
y_pred=classifier_regressor.predict(X_test)
score=accuracy_score(y_pred,y_test)
print(score)

0.8


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
classifier_rf = RandomForestClassifier()
parameters_rf = {'n_estimators': [50, 100, 150, 200], 'max_depth': [None, 10, 20, 30]}
classifier_regressor_rf = GridSearchCV(classifier_rf, param_grid=parameters_rf, scoring='accuracy', cv=5)
classifier_regressor_rf.fit(X_train, y_train)
print("\nRandom Forest:")
print("Best Score:", classifier_regressor_rf.best_score_)

In [None]:
from sklearn.metrics import accuracy_score,classification_report
y_pred=classifier_regressor_rf.predict(X_test)
score=accuracy_score(y_pred,y_test)
print(score)

0.9


In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
# Support Vector Machine (SVM)
classifier_svm = SVC()
parameters_svm = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
classifier_regressor_svm = GridSearchCV(classifier_svm, param_grid=parameters_svm, scoring='accuracy', cv=5)
classifier_regressor_svm.fit(X_train, y_train)

print("\nSupport Vector Machine:")
print("Best Score:", classifier_regressor_svm.best_score_)

In [None]:
from sklearn.metrics import accuracy_score,classification_report
y_pred=classifier_regressor_svm.predict(X_test)
score=accuracy_score(y_pred,y_test)
print(score)

0.9


In [None]:
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix
kernels = ["linear", "rbf", "poly", "sigmoid"]
best_accuracy = 0
best_kernel = None
best_model = None

for kernel in kernels:
    # Create and train the SVM model
    model = SVC(kernel=kernel)
    model.fit(X_train, y_train)

    # Make predictions on the testing set
    y_pred = model.predict(X_test)

    # Evaluate model performance
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy with kernel '{kernel}':", accuracy)

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_kernel = kernel
        best_model = model

print("\nBest kernel:", best_kernel)
print("Best accuracy:", best_accuracy)
cm = confusion_matrix(y_test, best_model.predict(X_test))
print("Confusion Matrix:\n", cm)
# # Visualize decision boundary (using the best model)
# plt.scatter(X_test["HR"], y_test,  cmap="viridis")
# plt.plot(X_test["HR"], best_model.predict(X_test), color="red", linewidth=2, label="Predicted")
# plt.xlabel("Heart Rate")
# plt.ylabel("Quality")
# plt.title("SVM Decision Boundary (Best Kernel)")
# plt.legend()
# plt.show()

Accuracy with kernel 'linear': 0.9
Accuracy with kernel 'rbf': 0.6
Accuracy with kernel 'poly': 0.6
Accuracy with kernel 'sigmoid': 0.6

Best kernel: linear
Best accuracy: 0.9
Confusion Matrix:
 [[3 1]
 [0 6]]


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
