In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from colorama import Fore, Style
import tensorflow as tf
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from scikeras.wrappers import KerasClassifier
import pickle

def print_decorative_log(message, color=Fore.BLUE, style=Style.RESET_ALL):
    line_length = len(message) + 4  # Length of the message plus padding on both sides
    decorative_line = "#" * line_length
    print(color + decorative_line)
    print(f"# {message} #")
    print(decorative_line + style)


# Load dataset
print_decorative_log("Loading Dataset", Fore.YELLOW)
df = pd.read_csv('merged_dataset.csv')
# Define the column names
columns = ['mfcc_' + str(i) for i in range(1, 301)] + ['label']

# Assign the column names to the DataFrame
df.columns = columns

print_decorative_log("Dataset Loaded", Fore.GREEN)

# Configure TensorFlow to use GPU
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(sess)

# # Dropping rows with label equal to 3
# df = df[df['label'] != 3]

print_decorative_log("Dataset preparation and splitting", Fore.YELLOW)
X = df.drop('label', axis=1).values.astype(np.float32)  # Features
#y = df['label'].values  # Labels
y = df['label'].values.astype(np.float32)  # Labels


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print_decorative_log("Dataset Splitted", Fore.GREEN)
# Define a standard scaler
scaler = StandardScaler()
# Normalize the feature values using StandardScaler
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# SVM Model
svm_model = SVC()
svm_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', svm_model)
])

print_decorative_log("Pipeline Configured", Fore.GREEN)

print_decorative_log("Training SVM Model", Fore.YELLOW)
# Fit the SVM model
svm_pipeline.fit(X_train, y_train)

print_decorative_log("SVM Model Trained", Fore.GREEN)

print_decorative_log("Evaluating SVM Model", Fore.YELLOW)
# Make predictions
y_pred = svm_pipeline.predict(X_test)
accuracy = svm_pipeline.score(X_test, y_test)
report = classification_report(y_test, y_pred)
print_decorative_log("SVM Accuracy:" + str(accuracy), Fore.YELLOW)
# Print the classification report
print_decorative_log({report}, Fore.GREEN)
print()
# Print classification report
print(classification_report(y_test, y_pred))
print_decorative_log("SVM Model Evaluated", Fore.GREEN)
  

[33m###################
# Loading Dataset #
###################[0m
[32m##################
# Dataset Loaded #
##################[0m
[33m#####################################
# Dataset preparation and splitting #
#####################################[0m
[32m####################
# Dataset Splitted #
####################[0m
[32m#######################
# Pipeline Configured #
#######################[0m
[33m######################
# Training SVM Model #
######################[0m
[32m#####################
# SVM Model Trained #
#####################[0m
[33m########################
# Evaluating SVM Model #
########################[0m
[33m##################################
# SVM Accuracy:0.994797368080323 #
##################################[0m
[32m#####
# {'              precision    recall  f1-score   support\n\n         0.0       0.85      0.73      0.78       512\n         1.0       0.93      0.77      0.84       540\n         2.0       0.88      0.67      0.76       506\n 

In [10]:
print(f"SVM Accuracy: {accuracy}")
print(report)
print()

SVM Accuracy: 0.994797368080323
              precision    recall  f1-score   support

         0.0       0.85      0.73      0.78       512
         1.0       0.93      0.77      0.84       540
         2.0       0.88      0.67      0.76       506
         3.0       1.00      1.00      1.00     83399

    accuracy                           0.99     84957
   macro avg       0.91      0.79      0.85     84957
weighted avg       0.99      0.99      0.99     84957




In [11]:
# Export the trained SVM model to a pickle file
with open('epilepsy_prediction_model.pkl', 'wb') as file:
    pickle.dump(svm_pipeline, file)
print_decorative_log("SVM Model Exported", Fore.GREEN)  

[32m######################
# SVM Model Exported #
######################[0m


In [4]:
import streamlit as st
import mne
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import joblib
from colorama import Fore, Style

# Define the channel pairs and their joined names
channel_pairs = [
    ['EEG Fp1', 'EEG F7'], ['EEG F7', 'EEG T3'], ['EEG T3', 'EEG T5'], ['EEG T5', 'EEG O1'],
    ['EEG Fp1', 'EEG F3'], ['EEG C3', 'EEG F3'], ['EEG F3', 'EEG O1'], ['EEG Fp2', 'EEG F4'],
    ['EEG F4', 'EEG C4'], ['EEG C4', 'EEG P4'], ['EEG P4', 'EEG O2'], ['EEG Fp2', 'EEG F8'],
    ['EEG F8', 'EEG T4'], ['EEG T4', 'EEG T6'], ['EEG T6', 'EEG O2']
]
channel_pairs_joined = ['{}-{}'.format(pair[0], pair[1]) for pair in channel_pairs]

# Load the pre-trained machine learning model
model_file = 'epilepsy_prediction_model.pkl'
model = joblib.load(model_file)

# Define the target sampling rate
target_sampling_rate = 256

# Function to compute cepstrum_mel
def compute_cepstrum_mel(data, sfreq, n_mfcc=20):
    mfccs = librosa.feature.mfcc(y=data, sr=sfreq, n_mfcc=n_mfcc)
    return mfccs

# Function to preprocess the raw data
def preprocess_raw(raw):
    # Preprocessing steps...

    print_decorative_log("Starting Preprocessing Sequence", Fore.GREEN)
               
    # Select the desired channels from channel pairs which resemble the bipolar longitudinal  channels of 10-20 system 
    selected_channels = []
    [selected_channels.extend(pair) for pair in channel_pairs if pair not in selected_channels]

    selected_channels = list(OrderedDict.fromkeys(selected_channels))
    selected_channels.append('2')

    #Drop extra channels
    # Check the number of channels
    #if len(raw.ch_names) > 35:
    for i, channel_name in enumerate(raw.ch_names):
        if 'EEG FP2' in channel_name:
            raw.rename_channels({channel_name: 'EEG Fp2'})
    # Drop channels not found in the desired channel list
    channels_to_drop = [channel_name for channel_name in raw.ch_names if channel_name not in selected_channels]
    raw.drop_channels(channels_to_drop)
    print_decorative_log("Extra Channels Dropped ... ", Fore.RED)

    # Reorder the channels to match the standard ordering for the dataset
    channels_order = selected_channels
    # Reorder channels
    raw = raw.pick(channels_order)
    print_decorative_log("Channels Reordered ... ", Fore.YELLOW)       
    # Set the channel type for '2' to 'ecg'
    raw.set_channel_types({'2': 'ecg'})
    
    print_decorative_log("ECG Channel Selected ... ", Fore.YELLOW)

    # Filtering to remove slow drifts
    filt_raw = raw.copy().filter(l_freq=1.0, h_freq=None)
    print_decorative_log("Slow drifts removed ... ", Fore.YELLOW)

    # Apply ICA to remove ECG artifacts

    ica = mne.preprocessing.ICA(n_components=15, max_iter="auto", random_state=97)
    ica.fit(filt_raw)
    ica.exclude = []
    ecg_indices, ecg_scores = ica.find_bads_ecg(raw, method="correlation", threshold="auto")
    ica.exclude = ecg_indices

    reconst_raw = raw.copy()
    ica.apply(reconst_raw)

    print_decorative_log("ECG Artificats Removed... ", Fore.YELLOW)


    # Perform bipolar longitudinal referencing
    anodes = []
    cathodes = []
    for pair in channel_pairs:
        anodes.append(pair[0])
        cathodes.append(pair[1])

    raw_bip_ref = mne.set_bipolar_reference(reconst_raw, anode=anodes, cathode=cathodes)
    raw_bip_ref_ch = raw_bip_ref.copy().pick_channels(channel_pairs_joined)
    print_decorative_log("Bipolar Referencing Done ... ", Fore.YELLOW)
    raw_clean = mne.preprocessing.oversampled_temporal_projection(raw_bip_ref_ch)
    raw_clean.filter(0.0, 40.0)
    print_decorative_log("Smoothing & Filtering Done ... ", Fore.YELLOW)

    return raw_clean


# Function to simulate streaming data and make predictions
def simulate_streaming_data(raw, start_time, end_time):
    st.write("Starting Simulation")

    # Crop the raw data to the specified start and end time
    raw.crop(tmin=start_time, tmax=end_time)

    # Preprocess the raw data
    preprocessed_raw = preprocess_raw(raw)

    # Get the data and the corresponding time vector
    data = preprocessed_raw.get_data()
    time = preprocessed_raw.times

    # Define the window size for frame sampling
    window_size = 10  # Window size in seconds

    # Calculate the number of samples in the window
    window_samples = int(window_size * target_sampling_rate)

    # Calculate the number of frames
    num_frames = int(len(data[0]) / window_samples)

    # Iterate over the frames
    for frame_idx in range(num_frames):
        # Calculate the start and end sample indices for the current frame
        start_idx = frame_idx * window_samples
        end_idx = start_idx + window_samples

        # Extract the frame data for all channels
        frame_data = data[:, start_idx:end_idx]

        # Compute mfccs
        n_mfcc = 20  # Number of MFCC coefficients
        cepstrum_mel_features = []
        for channel_data in frame_data:
            cepstrum_mel = compute_cepstrum_mel(channel_data, target_sampling_rate, n_mfcc)
            cepstrum_mel_features.append(cepstrum_mel)
        cepstral_features = np.concatenate(cepstrum_mel_features, axis=0)

        # Convert features to DataFrame
        frame_df = pd.DataFrame(cepstral_features.T, columns=[f'mfcc_{i}' for i in range(1, n_mfcc + 1)])

        # Apply feature scaling to the latest frame data
        scaler = StandardScaler()
        frame_scaled = scaler.fit_transform(frame_df)

        # Make prediction using the pre-trained model
        prediction = model.predict(frame_scaled)[0]

        # Map the predicted label to the corresponding class
        class_mapping = {0: 'pre-ictal', 1: 'ictal', 2: 'post-ictal', 3: 'normal'}
        predicted_class = class_mapping[prediction]

        # Display the streaming data and classification result
        st.subheader("Streaming 10 secs")
        st.info(f"Classification Result: {predicted_class}")
        st.write("--------------------------------")

# Streamlit app
@st.cache(allow_output_mutation=True)
def load_data(file_path):
    raw = mne.io.read_raw_edf(file_path)
    return raw

def main():
    st.title("EDF Streaming Data Classification")

    # File upload and user input
    uploaded_file = st.file_uploader("Upload EDF file", type=["edf"])

    if uploaded_file is not None:
        raw = load_data(uploaded_file)

        start_time = st.number_input("Start Time (in seconds)", min_value=0.0, max_value=raw.times[-1], value=0.0)
        end_time = st.number_input("End Time (in seconds)", min_value=start_time, max_value=raw.times[-1], value=raw.times[-1])

        if st.button("Start Classification"):
            simulate_streaming_data(raw, start_time, end_time)

if __name__ == "__main__":
    main()

2024-02-24 14:13:27.522 
  command:

    streamlit run C:\Users\MUSA\AppData\Roaming\Python\Python311\site-packages\ipykernel_launcher.py [ARGUMENTS]
