In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Load the synthetic ECG dataset
file_path = 'synthetic_ecg_dataset.csv'
df = pd.read_csv(file_path)

# Separate features and target variable
X = df.drop(columns=['Label'])
y = df['Label']

# Convert the target variable to binary format (0 for Normal, 1 for Arrhythmia)
y = y.map({'Normal': 0, 'Arrhythmia': 1})

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [25]:
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix, classification_report
import joblib



# Initialize the XGBoost Classifier
model = XGBClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Save the trained model
joblib.dump(model, 'xgboost_ecg_arrhythmia_predictor.pkl')

Confusion Matrix:
[[100   1]
 [  0  99]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      1.00       101
           1       0.99      1.00      0.99        99

    accuracy                           0.99       200
   macro avg       0.99      1.00      0.99       200
weighted avg       1.00      0.99      1.00       200


Accuracy Score: 0.99


['scaler.pkl']

In [24]:
# Load the saved model and scaler
rf_model = joblib.load('random_forest_ecg_predictor.pkl')
scaler = joblib.load('scaler.pkl')

# Example of a new sample input (replace this with actual values)
new_sample = np.array([[0.5, 0.2, -0.1, 0.1, 0.0, 0.3, 0.5]])  # Shape: (1, n_features)

# Scale the new sample using the saved scaler
new_sample_scaled = scaler.transform(new_sample)

# Make a prediction
predicted_class = rf_model.predict(new_sample_scaled)
print(f"Predicted Class for New Sample: {'Normal' if predicted_class[0] == 0 else 'Arrhythmia'}")


The ECG values indicate: Normal


In [1]:
import pandas as pd

# Load the uploaded dataset
file_path = 'synthetic_ecg_dataset.csv'
ecg_data = pd.read_csv(file_path)

# Display the first few rows and basic info to understand the structure of the dataset
ecg_data.head(), ecg_data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   RR_interval  1000 non-null   float64
 1   P_wave       1000 non-null   float64
 2   QRS_complex  1000 non-null   float64
 3   T_wave       1000 non-null   float64
 4   QT_interval  1000 non-null   float64
 5   Label        1000 non-null   object 
dtypes: float64(5), object(1)
memory usage: 47.0+ KB


(   RR_interval    P_wave  QRS_complex    T_wave  QT_interval       Label
 0     0.333218  0.109567     0.148371  0.220421     0.222145  Arrhythmia
 1     0.776526  0.110851     0.110732  0.146028     0.388263      Normal
 2     0.108118  0.128511     0.164493  0.119045     0.072078  Arrhythmia
 3     0.363796  0.057631     0.183969  0.170969     0.242531  Arrhythmia
 4     0.883923  0.111827     0.143667  0.159383     0.589282  Arrhythmia,
 None)

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Separate features and target
X = ecg_data.drop(columns=['Label'])
y = ecg_data['Label']

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_report_str = classification_report(y_test, y_pred, target_names=label_encoder.classes_)

accuracy, classification_report_str


(0.995,
 '              precision    recall  f1-score   support\n\n  Arrhythmia       0.99      1.00      0.99        99\n      Normal       1.00      0.99      1.00       101\n\n    accuracy                           0.99       200\n   macro avg       0.99      1.00      0.99       200\nweighted avg       1.00      0.99      1.00       200\n')

In [3]:
import joblib

# Save the trained model to a file
model_file_path = 'ecg_rf_model.pkl'
joblib.dump(model, model_file_path)

# Confirm the path
model_file_path


'ecg_rf_model.pkl'

In [4]:
import joblib
import numpy as np

# Load the saved model
model = joblib.load('ecg_rf_model.pkl')

# Example test data (replace with actual test data)
sample_data = np.array([[0.53,0.12,0.11,0.21,0.35]])  # Example values for RR_interval, P_wave, etc.

# Predict the class
predicted_class = model.predict(sample_data)

# Map the prediction to the label
predicted_label = label_encoder.inverse_transform(predicted_class)

print("Predicted Class:", predicted_label[0])


Predicted Class: Arrhythmia




In [5]:
import pandas as pd

# Load the dataset
dataset = pd.read_csv('synthetic_ecg_dataset.csv')

# Inspect the first few rows
print(dataset.head())


   RR_interval    P_wave  QRS_complex    T_wave  QT_interval       Label
0     0.333218  0.109567     0.148371  0.220421     0.222145  Arrhythmia
1     0.776526  0.110851     0.110732  0.146028     0.388263      Normal
2     0.108118  0.128511     0.164493  0.119045     0.072078  Arrhythmia
3     0.363796  0.057631     0.183969  0.170969     0.242531  Arrhythmia
4     0.883923  0.111827     0.143667  0.159383     0.589282  Arrhythmia


In [6]:
# Separate features and target variable
X = dataset[['RR_interval', 'P_wave', 'QRS_complex', 'T_wave', 'QT_interval']]  # Features
y = dataset['Label']  # Target variable

# Encode the target labels (if they are categorical strings like "Normal" and "Arrhythmia")
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and test sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


In [7]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the Random Forest model
model = RandomForestClassifier(random_state=42)

# Train the model on the training data
model.fit(X_train, y_train)


In [8]:
import joblib

# Save the trained model
joblib.dump(model, 'ecg_rf_model.pkl')

# Save the label encoder
joblib.dump(label_encoder, 'label_encoder.pkl')


['label_encoder.pkl']

In [9]:
import numpy as np
import joblib

# Load the trained model and label encoder
model = joblib.load('ecg_rf_model.pkl')
label_encoder = joblib.load('label_encoder.pkl')

# Sample input data (replace with actual input values)
input_data = np.array([[0.79, 0.09, 0.09, 0.17, 0.39]])  # Example ECG features

# Make a prediction
predicted_class = model.predict(input_data)

# Map the prediction to the original label
predicted_label = label_encoder.inverse_transform(predicted_class)

print("Predicted Class:", predicted_label[0])


Predicted Class: Normal




In [10]:
from sklearn.metrics import accuracy_score

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)


Model Accuracy: 0.995
