In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

In [None]:
# Generate synthetic data for illustration
np.random.seed(42)
num_samples = 1000
ecg_signals = np.random.randn(num_samples, 100)  # 100 time points in each ECG signal
severity_labels = np.random.randint(0, 4, size=num_samples)  # 4 severity levels

In [None]:
# Create a DataFrame
df = pd.DataFrame({'ECG_signal': list(ecg_signals), 'Severity': severity_labels})

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['ECG_signal'], df['Severity'], test_size=0.2, random_state=42)

In [None]:
# Feature extraction (simplified example)
# In a real-world scenario, you would use more advanced signal processing techniques
def extract_features(ecg_signal):
    # Example: mean of the ECG signal
    return np.mean(ecg_signal)

In [None]:
# Apply feature extraction to each ECG signal
X_train_features = np.array([extract_features(signal) for signal in X_train])
X_test_features = np.array([extract_features(signal) for signal in X_test])

In [None]:
# Standardize features
scaler = StandardScaler()
X_train_features = scaler.fit_transform(X_train_features.reshape(-1, 1))
X_test_features = scaler.transform(X_test_features.reshape(-1, 1))

In [None]:
# Train a Random Forest classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train_features, y_train)

In [None]:
# Make predictions on the test set
y_pred = clf.predict(X_test_features)

In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

In [None]:
print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)

In [None]:
import matplotlib.pyplot as plt

In [None]:
# Plot a few examples of ECG signals and their predicted severity
sample_indices = np.random.choice(len(X_test), size=5, replace=False)

plt.figure(figsize=(12, 6))
for i, index in enumerate(sample_indices, 1):
    plt.subplot(2, 3, i)
    plt.plot(X_test.iloc[index])
    plt.title(f"Predicted Severity: {y_pred[index]}")
    plt.xlabel("Time")
    plt.ylabel("ECG Signal")
plt.tight_layout()
plt.show()

In [None]:
################################### MORE sophisticated with real ECG data ######################################

In [None]:
import os
import wfdb

# Set the WFDB environment variable to the desired directory
os.environ['WFDB'] = 'D:\\Omar\\Friends\\European_HealthCare_Hackathon\\cinc2017\\c2017\\training2017\\training2017\\'

In [None]:
# Set the local directory for PhysioNet databases
wfdb.dl_database('aami-ec13', dl_dir='D:\\Omar\\Friends\\European_HealthCare_Hackathon\\cinc2017\\c2017\\training2017\\training2017\\')

In [None]:
import scipy.io

In [None]:
# Load annotations from the .mat file
annotation_path = os.path.join(os.environ['WFDB'], f'{record_name}.mat')
mat_data = scipy.io.loadmat(annotation_path)

# Extract annotations from the loaded .mat data (replace 'annotations' with the actual key in your .mat file)
annotations = mat_data['annotations']

# Now you can use 'annotations' in your code as needed

In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import wfdb
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [None]:
#record_name = 'aami3a'
#record_path = 'D:\\Omar\\Friends\\European_HealthCare_Hackathon\\cinc2017\\c2017\\training2017\\training2017\\' + record_name

In [None]:
# Load the record
#record = wfdb.rdrecord(record_path)
#annotation = wfdb.rdann(record_path, 'dat')

In [None]:
# Now, you should be able to load the record without FileNotFoundError
#record_name = 'aami3a'
#record = wfdb.rdrecord(record_name)
#annotation = wfdb.rdann(record_name, 'atr')

In [2]:
# Download the PhysioNet/CinC Challenge 2017 dataset (replace with your own dataset if needed)
record_list = wfdb.get_record_list('aami-ec13')

In [3]:
record_list

['aami3a',
 'aami3b',
 'aami3c',
 'aami3d',
 'aami4a',
 'aami4a_d',
 'aami4a_h',
 'aami4b',
 'aami4b_d',
 'aami4b_h']

In [5]:
# Create a DataFrame
data = []
for record_name in record_list:
    record_path = 'D:\\Omar\\Friends\\European_HealthCare_Hackathon\\cinc2017\\c2017\\training2017\\training2017\\' + record_name
    record = wfdb.rdrecord(record_path)
    annotation = wfdb.rdann(record_path, 'dat')
    ecg_signal = record.p_signal[:, 0]  # Assuming single lead ECG
    label = annotation.symbol
    data.append({'ECG_signal': ecg_signal, 'Label': label})

df = pd.DataFrame(data)

In [7]:
df

Unnamed: 0,ECG_signal,Label
0,"[0.18461538461538463, 0.18461538461538463, 0.1...","[L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, ..."
1,"[0.19230769230769232, 0.19230769230769232, 0.1...","[L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, ..."
2,"[0.2076923076923077, 0.2076923076923077, 0.223...","[L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, ..."
3,"[0.2230769230769231, 0.23846153846153847, 0.24...","[L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, ..."
4,"[-0.1, -0.075, -0.05, -0.025, 0.0, 0.0375, 0.0...","[N, N, N, N, L, L, L, L, L, L, L, L, L, L, L, ..."
5,"[-0.15, -0.125, -0.1, -0.075, -0.05, -0.025, 0...","[N, N, N, N, N, N, L, L, L, L, L, L, L, L, L, ..."
6,"[-0.0625, -0.0375, -0.00625, 0.01875, 0.04375,...","[N, N, N, L, L, L, L, L, L, L, L, L, L, L, L, ..."
7,"[-0.4625, -0.4375, -0.4125, -0.3875, -0.3625, ...","[N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, ..."
8,"[-0.875, -0.85, -0.825, -0.8, -0.775, -0.75, -...","[N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, ..."
9,"[-0.24375, -0.21875, -0.1875, -0.1625, -0.1375...","[N, N, N, N, N, N, N, N, N, L, L, L, L, L, L, ..."


In [6]:


# Map arrhythmia labels to severity levels (for illustration purposes)
severity_mapping = {'N': 0, 'A': 1, 'O': 2, '~': 3}
df['Severity'] = df['Label'].map(severity_mapping)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['ECG_signal'], df['Severity'], test_size=0.2, random_state=42)

# Feature extraction (use more advanced methods in a real-world scenario)
def extract_features(ecg_signal):
    # Example: compute heart rate from RR intervals
    rr_intervals = np.diff(np.where(ecg_signal > 0.5)[0])  # Assuming threshold for R-peaks
    heart_rate = 60 / np.mean(rr_intervals) if len(rr_intervals) > 0 else 0
    return heart_rate

# Apply feature extraction to each ECG signal
X_train_features = np.array([extract_features(signal) for signal in X_train])
X_test_features = np.array([extract_features(signal) for signal in X_test])

# Standardize features
scaler = StandardScaler()
X_train_features = scaler.fit_transform(X_train_features.reshape(-1, 1))
X_test_features = scaler.transform(X_test_features.reshape(-1, 1))

# Train a Random Forest classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train_features, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test_features)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)

# Plot a few examples of ECG signals and their predicted severity
sample_indices = np.random.choice(len(X_test), size=5, replace=False)

plt.figure(figsize=(12, 6))
for i, index in enumerate(sample_indices, 1):
    plt.subplot(2, 3, i)
    plt.plot(X_test.iloc[index])
    plt.title(f"Predicted Severity: {y_pred[index]}")
    plt.xlabel("Time")
    plt.ylabel("ECG Signal")
plt.tight_layout()
plt.show()

TypeError: unhashable type: 'list'