#Import libraries

In [None]:
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import joblib

import tensorflow as tf

from sklearn.metrics import classification_report, precision_recall_curve

#Load data

In [None]:
files = ["tracings_part_14to17_filtered.npy", "data_part_14to17_filtered.h5"]

base_path = '/content/drive/MyDrive/RedoneDataOct/'

tracings_file = os.path.join(base_path, files[0])
data_file = os.path.join(base_path, files[1])

data = pd.read_hdf(data_file)
tracings = np.load(tracings_file, allow_pickle=True)
data['tracings'] = list(tracings)

arrhythmia_columns = ['1dAVb', 'RBBB', 'LBBB', 'SB', 'ST', 'AF', 'normal_ecg']

# Define X (features) and y (labels)
X = np.array([tracing for tracing in data['tracings']])
y = data[['1dAVb', 'RBBB', 'LBBB', 'SB', 'ST', 'AF', 'normal_ecg']].values

#Scale data

In [None]:
scaler_file = '/content/drive/MyDrive/RedoneDataOct/Processed/Generalscaler.pkl'
# Load scaler
scaler = joblib.load(scaler_file)

X = np.array([scaler.transform(tracing) for tracing in X])

#Load model and make predictions

In [None]:
model = tf.keras.models.load_model('/content/drive/MyDrive/RedoneDataOct/Multi/PUB_MODEL7.keras')
# Make predictions on the normalized test set
y_pred = model.predict(X)

#Evaluate model performance

In [None]:
# Compute precision-recall curve for each class
precision = {}
recall = {}
thresholds = {}

# Convert test labels to binary format if necessary (if TRUE/FALSE were strings in the original dataset)
y = data[arrhythmia_columns].replace({'TRUE': 1, 'FALSE': 0}).values

# Compute precision-recall curve for each class
for i, label in enumerate(arrhythmia_columns):
    precision[label], recall[label], thresholds[label] = precision_recall_curve(y[:, i], y_pred[:, i])

# Plot precision-recall curve for each class
plt.figure(figsize=(10, 7))
for label in arrhythmia_columns:
    plt.plot(recall[label], precision[label], label=f'Precision-Recall curve for {label}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve for Each Arrhythmia')
plt.legend(loc="best")
plt.show()

# Find the optimal threshold that maximizes F1 score for each arrhythmia class
optimal_thresholds = {}
for i, label in enumerate(arrhythmia_columns):
    f1_scores = 2 * (precision[label] * recall[label]) / (precision[label] + recall[label] + 1e-6)  # Avoid division by zero
    optimal_idx = np.argmax(f1_scores)
    optimal_thresholds[label] = thresholds[label][optimal_idx]
    print(f'Optimal threshold for {label}: {optimal_thresholds[label]:.2f} (F1 score: {f1_scores[optimal_idx]:.2f})')

# Apply the optimal thresholds for making predictions
y_pred_optimal_1 = np.zeros_like(y_pred)
for i, label in enumerate(arrhythmia_columns):
    y_pred_optimal_1[:, i] = (y_pred[:, i] > optimal_thresholds[label]).astype(int)

# Evaluate model performance with the new thresholds
print("Classification Report with Optimal Thresholds:")
print(classification_report(y, y_pred_optimal_1, target_names=arrhythmia_columns, zero_division=1))