In [None]:
# Step 1: Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, recall_score
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models

In [None]:
# Step 2: Load the dataset and Display the first few and last few rows
df = pd.read_csv("D://ECGdata.csv")
print("First few rows:")
print(df.head())
print("\nLast few rows:")
print(df.tail())

In [None]:
# Step 3: Preprocessing and Visualization
# Checking for missing values
missing_values = df.isnull().sum()
# Summary statistics of the dataset
df.describe(include='all')
# One-hot encode the 'ECG_signal' column
df_encoded = pd.get_dummies(df, columns=['ECG_signal'], drop_first=True)
# Fill missing values with median of respective columns
df_filled = df_encoded.fillna(df_encoded.median())
# Create visualizations
# Plot ECG signal from the first row of columns 2 to 55
row_number = 77  # Example row number, change as needed
ecg_data = df_filled.iloc[row_number, 1:56].values  # Assuming ECG data is in columns 2 to 55
plt.figure(figsize=(12, 6))
plt.plot(ecg_data)
plt.title(f'ECG SIGNAL OF APERSON')
plt.xlabel('Sample')
plt.ylabel('Amplitude')
plt.show()
plt.figure(figsize=(10, 6))
sns.countplot(x='ECG_signal', data=df, palette='pastel', hue='ECG_signal', dodge=False, legend=False)
plt.title('Distribution of ECG Signal Types')
plt.show()
corr = df_encoded.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
f, ax = plt.subplots(figsize=(11, 9))
cmap = sns.diverging_palette(230, 20, as_cmap=True)
sns.heatmap(corr, mask=mask, cmap=cmap, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5})
plt.title('Correlation Heatmap of ECG Features')
plt.show()
#step 4: Feature visualization
df_filled['ECG_signal'] = df['ECG_signal']
features = ['hbpermin', 'QRSarea', 'QRSperi']
for feature in features:
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='ECG_signal', y=feature, data=df_filled)
    plt.title(f'Distribution of {feature} for Each ECG Signal Type')
    plt.show()
plt.figure(figsize=(10, 6))
sns.violinplot(x='ECG_signal', y='hbpermin', data=df_filled)
plt.title('Distribution of Heartbeats per Minute for Each ECG Signal Type')
plt.show()
# Visualize the dataset using the scatter plot
x = df_filled['hbpermin']
y = df_filled['ECG_signal']
plt.scatter(x, y)
plt.xlabel('Heart Beat per minute')
plt.ylabel('Cardiac Ailments')
plt.show()


In [None]:
# MACHINE LEARNING - CLASSIFICATION (SVM,RANDOM FOREST,NAIVE BAYES)
# Step 5: Load the dataset
df = pd.read_csv(r"D:\ECGdata.csv")
# Step 6: Define selected features
selected_features = ['hbpermin', 'Pseg', 'PQseg', 'QRSseg', 'QRseg', 'QTseg', 'RSseg', 'STseg', 'Tseg', 'PTseg', 'ECGseg',
                     'QRtoQSdur', 'RStoQSdur', 'RRmean', 'PPmean', 'PQdis', 'PonQdis', 'PRdis', 'PonRdis', 'PSdis',
                     'PonSdis', 'PTdis', 'PonTdis', 'PToffdis', 'QRdis', 'QSdis', 'QTdis', 'QToffdis', 'RSdis', 'RTdis',
                     'RToffdis', 'STdis', 'SToffdis', 'PonToffdis', 'PonPQang', 'PQRang', 'QRSang', 'RSTang', 'STToffang',
                     'RRTot', 'NNTot', 'SDRR', 'IBIM', 'IBISD', 'SDSD', 'RMSSD', 'QRSarea', 'QRSperi', 'PQslope', 'QRslope',
                     'RSslope', 'STslope', 'NN50', 'pNN50']
# Step 7: Prepare the data
X = df[selected_features]  # Features
y = df['ECG_signal']  # Labels
# Step 8: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
# Step 9: Handle missing values
imputer = SimpleImputer(strategy='median')  # Imputer to replace missing values with median
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)
# Step 10: Train the classifiers
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train_imputed, y_train)
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_imputed, y_train)
nb_classifier = GaussianNB()
nb_classifier.fit(X_train_imputed, y_train)
# Step 11: Make predictions
svm_pred = svm_classifier.predict(X_test_imputed)
rf_pred = rf_classifier.predict(X_test_imputed)
nb_pred = nb_classifier.predict(X_test_imputed)
# Step 12: Calculate accuracy
svm_accuracy = accuracy_score(y_test, svm_pred)
rf_accuracy = accuracy_score(y_test, rf_pred)
nb_accuracy = accuracy_score(y_test, nb_pred)
# Display validation accuracy in command window
print("Validation Accuracy of SVM:", svm_accuracy)
print("Validation Accuracy of Random Forest:", rf_accuracy)
print("Validation Accuracy of Naive Bayes:", nb_accuracy)
# Step 13: AUROC score
svm_pred_scores = svm_classifier.decision_function(X_test_imputed)
svm_pred_probs = np.exp(svm_pred_scores) / np.sum(np.exp(svm_pred_scores), axis=1, keepdims=True)
rf_pred_probs = rf_classifier.predict_proba(X_test_imputed)
nb_pred_probs = nb_classifier.predict_proba(X_test_imputed)
svm_auroc = roc_auc_score(y_test, svm_pred_probs, multi_class='ovr', average='weighted')
rf_auroc = roc_auc_score(y_test, rf_pred_probs, multi_class='ovr', average='weighted')
nb_auroc = roc_auc_score(y_test, nb_pred_probs, multi_class='ovr', average='weighted')
# Step 14: F1 score
svm_f1 = f1_score(y_test, svm_pred, average='weighted')
rf_f1 = f1_score(y_test, rf_pred, average='weighted')
nb_f1 = f1_score(y_test, nb_pred, average='weighted')
# Step 15: Recall score
svm_recall = recall_score(y_test, svm_pred, average='weighted')
rf_recall = recall_score(y_test, rf_pred, average='weighted')
nb_recall = recall_score(y_test, nb_pred, average='weighted')
# Step 16: Display scores
print("AUROC Score (SVM):", svm_auroc)
print("AUROC Score (Random Forest):", rf_auroc)
print("AUROC Score (Naive Bayes):", nb_auroc)
print("F1 Score (SVM):", svm_f1)
print("F1 Score (Random Forest):", rf_f1)
print("F1 Score (Naive Bayes):", nb_f1)
print("Recall Score (SVM):", svm_recall)
print("Recall Score (Random Forest):", rf_recall)
print("Recall Score (Naive Bayes):", nb_recall)
# Step 17: Visualize results
# Bar graph: Actual vs. Predicted (SVM)
plt.figure(figsize=(8, 5))
actual_counts = y_test.value_counts(normalize=True)
predicted_counts_svm = pd.Series(svm_pred).value_counts(normalize=True)
predicted_counts_rf = pd.Series(rf_pred).value_counts(normalize=True)
predicted_counts_nb = pd.Series(nb_pred).value_counts(normalize=True)
# Only display abnormal cases
abnormal_cases = ['ARR', 'AFF', 'CHF']
predicted_counts_svm_abnormal = predicted_counts_svm[abnormal_cases].sum()
predicted_counts_rf_abnormal = predicted_counts_rf[abnormal_cases].sum()
predicted_counts_nb_abnormal = predicted_counts_nb[abnormal_cases].sum()
actual_counts.drop('NSR', inplace=True)  # Drop NSR label for plotting
actual_counts.plot(kind='bar', color='blue', alpha=0.5, label='Actual')
plt.bar('SVM', predicted_counts_svm_abnormal, color='orange', alpha=0.7, label='Predicted (SVM)')
plt.bar('Random Forest', predicted_counts_rf_abnormal, color='green', alpha=0.7, label='Predicted (Random Forest)')
plt.bar('Naive Bayes', predicted_counts_nb_abnormal, color='red', alpha=0.7, label='Predicted (Naive Bayes)')
plt.title('Actual vs. Predicted (Abnormal Cases)')
plt.xlabel('Labels')
plt.ylabel('Percentage')
plt.legend()
plt.show()
# Bar graph: Validation Accuracy of 3 Models
models = ['SVM', 'Random Forest', 'Naive Bayes']
accuracies = [svm_accuracy, rf_accuracy, nb_accuracy]
plt.figure(figsize=(8, 5))
plt.bar(models, accuracies, color=['blue', 'green', 'red'])
plt.title('Validation Accuracy of 3 Models')
plt.xlabel('Models')
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.show()
# User input and analysis
row_number = int(input("Enter a row number from the test dataset (0 to {}): ".format(len(X_test)-1)))
# Get the features of the selected row
selected_row_features = X_test_imputed[row_number, :].reshape(1, -1)
# Predict the label using all classifiers
svm_pred_row = svm_classifier.predict(selected_row_features)
rf_pred_row = rf_classifier.predict(selected_row_features)
nb_pred_row = nb_classifier.predict(selected_row_features)
# Define a function to plot pie charts
def plot_pie_chart(pred_svm, pred_rf, pred_nb):
    labels = ['Normal', 'Abnormal']
    pred_counts_svm = [np.sum(pred_svm == 'NSR'), len(pred_svm) - np.sum(pred_svm == 'NSR')]
    pred_counts_rf = [np.sum(pred_rf == 'NSR'), len(pred_rf) - np.sum(pred_rf == 'NSR')]
    pred_counts_nb = [np.sum(pred_nb == 'NSR'), len(pred_nb) - np.sum(pred_nb == 'NSR')]
    fig, axs = plt.subplots(1, 3, figsize=(15, 5))
    axs[0].pie(pred_counts_svm, labels=labels, autopct='%1.1f%%', startangle=140)
    axs[0].set_title('Predicted Classification (SVM)')
    axs[1].pie(pred_counts_rf, labels=labels, autopct='%1.1f%%', startangle=140)
    axs[1].set_title('Predicted Classification (Random Forest)')
    axs[2].pie(pred_counts_nb, labels=labels, autopct='%1.1f%%', startangle=140)
    axs[2].set_title('Predicted Classification (Naive Bayes)')
    plt.show()
# Display the predicted results as pie charts
plot_pie_chart(svm_pred_row, rf_pred_row, nb_pred_row)

In [None]:
#DEEP LEARNING - CLASSIFICATION AND DETECTION (CNN)
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from matplotlib.animation import FuncAnimation  # Import FuncAnimation
# Step 18: Load the dataset
df = pd.read_csv(r"D:\ECGdatacopy.csv")
# Step 19: Define selected features and labels
selected_features = ['hbpermin', 'Pseg', 'PQseg', 'QRSseg', 'QRseg', 'QTseg', 'RSseg', 'STseg', 'Tseg', 'PTseg', 'ECGseg',
                     'QRtoQSdur', 'RStoQSdur', 'RRmean', 'PPmean', 'PQdis', 'PonQdis', 'PRdis', 'PonRdis', 'PSdis',
                     'PonSdis', 'PTdis', 'PonTdis', 'PToffdis', 'QRdis', 'QSdis', 'QTdis', 'QToffdis', 'RSdis', 'RTdis',
                     'RToffdis', 'STdis', 'SToffdis', 'PonToffdis', 'PonPQang', 'PQRang', 'QRSang', 'RSTang', 'STToffang',
                     'RRTot', 'NNTot', 'SDRR', 'IBIM', 'IBISD', 'SDSD', 'RMSSD', 'QRSarea', 'QRSperi', 'PQslope', 'QRslope',
                     'RSslope', 'STslope', 'NN50', 'pNN50']
X = df[selected_features]  # Features
y = df['ECG_signal']  # Labels
# Map labels to integers
label_map = {'NSR': 0, 'ARR': 1, 'AFF': 1, 'CHF': 1}
y = y.map(label_map)
# Step 20: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Step 21: Handle missing values (if any)
imputer = SimpleImputer(strategy='median')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)
# Step 22: Reshape data for CNN input
input_shape = (X_train_imputed.shape[1], 1)
X_train_reshaped = X_train_imputed.reshape(-1, X_train_imputed.shape[1], 1)
X_test_reshaped = X_test_imputed.reshape(-1, X_test_imputed.shape[1], 1)
# Step 23: Define CNN architecture with Dropout
model = models.Sequential([
    layers.Conv1D(32, 3, activation='relu', input_shape=input_shape),
    layers.MaxPooling1D(2),
    layers.Conv1D(64, 3, activation='relu'),
    layers.MaxPooling1D(2),
    layers.Conv1D(64, 3, activation='relu'),
    layers.Flatten(),
    layers.Dropout(0.5),  # Add Dropout layer with dropout rate of 0.5
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])
# Step 24: Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Step 25: Train the model with Early Stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
history = model.fit(X_train_reshaped, y_train, epochs=20, validation_split=0.2, callbacks=[early_stopping])
# Step 26: Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test_reshaped, y_test)
print('CNN Model Test Accuracy:', test_accuracy)
# Calculate AUROC Score, F1 Score, and Recall Score
y_pred_prob_test = model.predict(X_test_reshaped)
auroc_score = roc_auc_score(y_test, y_pred_prob_test)
print('AUROC Score:', auroc_score)
y_pred_test = np.where(y_pred_prob_test > 0.5, 1, 0)
f1_score_cnn = f1_score(y_test, y_pred_test)
recall_score_cnn = recall_score(y_test, y_pred_test)
print('F1 Score:', f1_score_cnn)
print('Recall Score:', recall_score_cnn)
# Step 27: Plot training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
# Bar graph: Validation Accuracy of CNN Model
plt.figure(figsize=(6, 4))
plt.bar(['CNN'], [test_accuracy], color='purple')
plt.title('Validation Accuracy of CNN Model')
plt.xlabel('Model')
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.show()
# Bar graph: Actual vs. Predicted (CNN)
plt.figure(figsize=(8, 5))
actual_counts = y_test.value_counts(normalize=True)
predicted_counts_cnn = pd.Series(y_pred_test.flatten()).value_counts(normalize=True)  # Flatten y_pred_test
# Only display abnormal cases
abnormal_cases = [1]  # Assuming 1 represents abnormal cases
predicted_counts_cnn_abnormal = predicted_counts_cnn[abnormal_cases].sum()
actual_counts.drop(0, inplace=True)  # Drop 0 label for plotting, assuming 0 represents normal cases
actual_counts.plot(kind='bar', color='blue', alpha=0.5, label='Actual')
plt.bar('CNN', predicted_counts_cnn_abnormal, color='purple', alpha=0.7, label='Predicted (CNN)')
plt.title('Actual vs. Predicted (Abnormal Cases) - CNN')
plt.xlabel('Labels')
plt.ylabel('Percentage')
plt.legend()
plt.show()
# Plotting training and validation accuracy and loss
plt.figure(figsize=(10, 6))
# Plot training & validation accuracy values
plt.subplot(2, 1, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
# Plot training & validation loss values
plt.subplot(2, 1, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.show()
# Mapping of predicted diseases to full names
disease_mapping = {
    'ARR': 'ARRHYTHMIA',
    'AFF': 'ATRIAL FIBRILLATION',
    'CHF': 'CONGESTIVE HEART FAILURE',
    'NSR': 'NORMAL SINUS RHYTHM'
}
# User input and analysis
row_number = int(input("Enter a row number from the test dataset (0 to {}): ".format(len(X_test)-1)))
selected_row_features = X_test_imputed[row_number, :].reshape(1, X_test_imputed.shape[1], 1)  # Reshape to match model input shape
y_pred_row = model.predict(selected_row_features)
# Get ECG data and convert to numeric (skip non-numeric values)
ecg_data = df.iloc[row_number, 1:56].apply(pd.to_numeric, errors='coerce').dropna().values
plt.figure(figsize=(12, 6))
plt.plot(ecg_data)
plt.title(f'ECG SIGNAL OF PERSON {row_number}')
plt.xlabel('Sample')
plt.ylabel('Amplitude')
plt.show()
# Step 28: Detect disease based on abnormal prediction
def detect_disease(y_pred, row_number):
    if y_pred > 0.5:  # If abnormal prediction
        actual_label = y_test.iloc[row_number]  # Get actual label from test data
        predicted_disease = df.iloc[y_test.index[row_number]]['ECG_signal']
        print("Actual Disease:", actual_label)
        print("Predicted Disease:", predicted_disease)
        return disease_mapping.get(predicted_disease, 'Unknown')
    else:  # If normal prediction
        print("No Abnormality Detected")
        return None
import matplotlib.pyplot as plt
def display_big_screen_warning(pred_prob, detected_disease=None):
    plt.figure(figsize=(10, 6))
    plt.axis('off')
    if pred_prob <= 0.5:
        plt.text(0.5, 0.5, 'Great News!\nYour ECG signal is NORMAL.\nKeep up the good work!', ha='center', va='center', fontsize=20, color='green')
    else:
        warning_msg = 'ABNORMAL SIGNAL DETECTED!\nPlease consult a healthcare professional.'
        if detected_disease:
            warning_msg += '\nDetected Condition: ' + detected_disease
        plt.text(0.5, 0.5, warning_msg, ha='center', va='center', fontsize=20, color='red')
    plt.show()
# Call the function to detect disease and display the warning
detected_disease = detect_disease(y_pred_row, row_number)
if detected_disease != 'Unknown':
    display_big_screen_warning(y_pred_row, detected_disease)
else:
    print("YOU ARE HEALTHY.")
# Define function to plot pie chart showing predicted classification
def plot_pie_chart(pred_cnn):
    labels = ['Normal', 'Abnormal']
    pred_counts_cnn = [np.sum(pred_cnn == 0), np.sum(pred_cnn == 1)]
    # Handle NaN values
    pred_counts_cnn = [0 if np.isnan(x) else x for x in pred_counts_cnn]
    # Check if both values are zero
    if all(value == 0 for value in pred_counts_cnn):
        print("Invalid prediction.")  # Handle case where both values are zero
        return
    plt.figure(figsize=(6, 4))
    plt.pie(pred_counts_cnn, labels=labels, autopct='%1.1f%%', startangle=140)
    plt.title('Predicted Classification (CNN)')
    plt.show()
# Display the predicted results as pie chart
plot_pie_chart(y_pred_row)