In [None]:
importimport pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import zscore
from scipy.signal import hilbert
from scipy.signal import spectrogram
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
data = pd.read_csv('/content/drive/MyDrive/Project/242113.csv')
data.sample(5)

In [None]:
data.info()


In [None]:
data.isnull().sum()

In [None]:
numeric_data = data.select_dtypes(include=[np.number])

z_scores_numeric = numeric_data.apply(zscore)

outliers_z_numeric = (z_scores_numeric.abs() > 3).sum()

outliers_z_numeric[outliers_z_numeric > 0]

In [None]:
rows_with_outliers = (z_scores_numeric.abs() > 3).any(axis=1)

# Count of rows before removal
rows_before = data.shape[0]

# Remove rows with outliers
cleaned_data = data[~rows_with_outliers]

# Count of rows after removal
rows_after = cleaned_data.shape[0]

rows_before, rows_after

In [None]:
signal_data = cleaned_data['MeanP_Alpha_F4']

# Compute the spectrogram
frequencies, times, Sxx = spectrogram(signal_data, fs=1, nperseg=256, noverlap=128)

# Plot the spectrogram
plt.figure(figsize=(10, 5))
plt.pcolormesh(times, frequencies, 10 * np.log10(Sxx), shading='gouraud')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')
plt.title('Spectrogram of MeanP_Alpha_F4')
plt.colorbar(label='Intensity [dB]')
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(2, 1, 1)
plt.plot( cleaned_data['MeanP_Alpha_F4'].rolling(window=100).mean(), label='Moving Average of MeanP_Alpha_F4', color='blue')
plt.title('Moving Average (100-point window) of MeanP_Alpha_F4')
plt.xlabel('Data Point Index')
plt.ylabel('Moving Average Value')
plt.legend()

In [None]:
plt.subplot(2, 1, 2)
plt.plot(cleaned_data['MeanP_Beta_F4'].rolling(window=100).mean(), label='Moving Average of MeanP_Beta_F4', color='green')
plt.title('Moving Average (100-point window) of MeanP_Beta_F4')
plt.xlabel('Data Point Index')
plt.ylabel('Moving Average Value')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
from scipy.signal import coherence

alpha_f4 = cleaned_data['MeanP_Alpha_F4'].values
alpha_o2 = cleaned_data['MeanP_Alpha_O2'].values

frequencies, coh = coherence(alpha_f4, alpha_o2, fs=1, nperseg=256)

# Plot coherence
plt.figure(figsize=(10, 5))
plt.semilogy(frequencies, coh, label='Coherence between F4 and O2 (Alpha band)')
plt.title('Coherence between MeanP_Alpha_F4 and MeanP_Alpha_O2')
plt.xlabel('Frequency [Hz]')
plt.ylabel('Coherence')
plt.legend()
plt.show()

In [None]:
beta_f4 = cleaned_data['MeanP_Beta_F4'].values
beta_o2 = cleaned_data['MeanP_Beta_O2'].values

beta_frequencies, beta_coh = coherence(beta_f4, beta_o2, fs=1, nperseg=256)

plt.figure(figsize=(10, 5))
plt.semilogy(beta_frequencies, beta_coh, label='Coherence between F4 and O2 (Beta band)')
plt.title('Coherence between MeanP_Beta_F4 and MeanP_Beta_O2')
plt.xlabel('Frequency [Hz]')
plt.ylabel('Coherence')
plt.legend()
plt.show()

In [None]:
def phase_synchronization(signal1, signal2):
    analytic_signal1 = hilbert(signal1)
    analytic_signal2 = hilbert(signal2)

    phase1 = np.angle(analytic_signal1)
    phase2 = np.angle(analytic_signal2)

    # phase difference
    phase_diff = np.abs(phase1 - phase2)

    # Calculate the phase synchronization index
    sync_index = 1 - phase_diff / np.pi

    return sync_index

phase_sync = phase_synchronization(beta_f4, beta_o2)

# Plotting
plt.figure(figsize=(10, 5))
plt.plot(phase_sync, label='Phase Synchronization (Beta F4 and Beta O2)')
plt.title('Phase Synchronization between MeanP_Beta_F4 and MeanP_Beta_O2')
plt.xlabel('Time Points')
plt.ylabel('Synchronization Index')
plt.legend()
plt.show()

In [None]:
X = cleaned_data.drop('Sleep_Stage', axis=1)
y = cleaned_data['Sleep_Stage'].apply(lambda x: 1 if x == 'REM' else 0)

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.20, random_state=42, stratify=y)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

In [None]:
X_train_scaled = X_train_scaled.reshape(X_train_scaled.shape[0], X_train_scaled.shape[1], 1)
X_val_scaled = X_val_scaled.reshape(X_val_scaled.shape[0], X_val_scaled.shape[1], 1)
X_train_scaled.shape,X_val_scaled.shape

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.initializers import GlorotUniform
from tensorflow.keras.optimizers import Adam

model = Sequential([
    LSTM(64, input_shape=(X_train_scaled.shape[1], 1), return_sequences=True, kernel_initializer=GlorotUniform(seed=42)),
    Dropout(0.5),
    LSTM(32, kernel_initializer=GlorotUniform(seed=42)),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Use an Adam optimizer
optimizer = Adam(learning_rate=0.0001)

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])


model.summary()

In [None]:
history=model.fit(X_train_scaled, y_train, validation_data=(X_val_scaled, y_val), epochs=5, batch_size=64)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)

# Plotting training and validation accuracy
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Plotting training and validation loss
plt.subplot(1, 2, 2)
plt.plot(epochs, loss, 'ro', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
!pip install gradio
import gradio as gr
import numpy as np
import pandas as pd # Ensure pandas is available if not explicitly imported earlier

# The 'model', 'scaler', and 'X' (DataFrame of features) are available from previous cells.

def predict_sleep_stage(feature_values_str):
    try:
        # Parse the input string into a list of floats
        features = [float(f.strip()) for f in feature_values_str.split(',')]

        # Check if the number of features is correct (should be 75 based on X_train_scaled.shape[1])
        if len(features) != X.shape[1]:
            return f"Error: Expected {X.shape[1]} features, but got {len(features)}. Please provide all feature values."

        # Convert to numpy array and reshape for scaling (scaler expects 2D input: [samples, features])
        input_data = np.array(features).reshape(1, -1)

        # Scale the input data using the pre-fitted scaler
        scaled_input = scaler.transform(input_data)

        # Reshape for the LSTM model (add a time step dimension: [batch_size, timesteps, features])
        # Here, timesteps is the number of features (75) and features per timestep is 1.
        reshaped_input = scaled_input.reshape(1, scaled_input.shape[1], 1)

        # Make prediction using the trained model
        prediction_probability = model.predict(reshaped_input)[0][0]

        # Interpret the prediction
        if prediction_probability >= 0.5:
            return f"Predicted Sleep Stage: REM (Probability: {prediction_probability:.4f})"
        else:
            return f"Predicted Sleep Stage: NREM (Probability: {prediction_probability:.4f})"

    except ValueError:
        return "Error: Invalid input. Please enter comma-separated numerical values."
    except Exception as e:
        return f"An unexpected error occurred: {e}"

# Get feature names for potential input labels or examples
feature_names = X.columns.tolist()

# Create an example input string from the first row of your feature data
example_input = ", ".join(map(str, X.iloc[0].values))

# Create the Gradio interface
iface = gr.Interface(
    fn=predict_sleep_stage,
    inputs=gr.Textbox(
        label=f"Enter {X.shape[1]} comma-separated feature values (e.g., {feature_names[0]}, {feature_names[1]}, ...)",
        placeholder=example_input, # Provide an example in the textbox
        lines=5 # Allow multi-line input for easier viewing of many features
    ),
    outputs=gr.Label(label="Prediction Result"),
    title="Sleep Stage Classification Model",
    description="This interface predicts the sleep stage (REM or NREM) based on 75 input features. The model is an LSTM neural network." # More descriptive text
)

# Launch the interface
iface.launch(debug=True, share=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://a3ddee01645ca296de.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [None]:
from sklearn.ensemble import RandomForestClassifier

# Instantiate RandomForestClassifier
rf_classifier = RandomForestClassifier(random_state=42)

# Reshape X_train_scaled from 3D to 2D for RandomForestClassifier
X_train_2d = X_train_scaled.reshape(X_train_scaled.shape[0], -1)

# Fit the model
rf_classifier.fit(X_train_2d, y_train)

# Extract feature importances
feature_importances = rf_classifier.feature_importances_

# Create a pandas Series for better visualization
feature_importance_series = pd.Series(feature_importances, index=X.columns)

# Sort the features by importance in descending order
sorted_feature_importances = feature_importance_series.sort_values(ascending=False)

# Display the top features
print("Top 10 Most Important Features:")
print(sorted_feature_importances.head(10))

## Select Top 10 Features



In [None]:
top_10_features = sorted_feature_importances.head(10).index.tolist()
X_10_features = X[top_10_features]

print("Shape of X_10_features:", X_10_features.shape)
print("Columns in X_10_features:", X_10_features.columns.tolist())

In [None]:
X_train_10, X_val_10, y_train_10, y_val_10 = train_test_split(X_10_features, y, test_size=0.20, random_state=42, stratify=y)

print(f"Shape of X_train_10: {X_train_10.shape}")
print(f"Shape of X_val_10: {X_val_10.shape}")
print(f"Shape of y_train_10: {y_train_10.shape}")
print(f"Shape of y_val_10: {y_val_10.shape}")

In [None]:
scaler_10_features = StandardScaler()
X_train_10_scaled = scaler_10_features.fit_transform(X_train_10)
X_val_10_scaled = scaler_10_features.transform(X_val_10)

print(f"Shape of X_train_10_scaled: {X_train_10_scaled.shape}")
print(f"Shape of X_val_10_scaled: {X_val_10_scaled.shape}")

In [None]:
X_train_10_scaled = X_train_10_scaled.reshape(X_train_10_scaled.shape[0], X_train_10_scaled.shape[1], 1)
X_val_10_scaled = X_val_10_scaled.reshape(X_val_10_scaled.shape[0], X_val_10_scaled.shape[1], 1)

print(f"Reshaped X_train_10_scaled shape: {X_train_10_scaled.shape}")
print(f"Reshaped X_val_10_scaled shape: {X_val_10_scaled.shape}")

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.initializers import GlorotUniform
from tensorflow.keras.optimizers import Adam

model_10_features = Sequential([
    LSTM(64, input_shape=(X_train_10_scaled.shape[1], 1), return_sequences=True, kernel_initializer=GlorotUniform(seed=42)),
    Dropout(0.5),
    LSTM(32, kernel_initializer=GlorotUniform(seed=42)),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

optimizer_10_features = Adam(learning_rate=0.0001)

model_10_features.compile(optimizer=optimizer_10_features, loss='binary_crossentropy', metrics=['accuracy'])

model_10_features.summary()

In [None]:
history_10_features = model_10_features.fit(X_train_10_scaled, y_train_10, validation_data=(X_val_10_scaled, y_val_10), epochs=5, batch_size=64)

## Create Gradio Interface for 10-Feature Model



In [None]:
def predict_sleep_stage_10_features(*feature_values):
    try:
        # Convert input tuple to numpy array and reshape for scaler (1 sample, 10 features)
        input_data = np.array(feature_values).reshape(1, -1)

        # Scale the input data using the pre-fitted scaler for 10 features
        scaled_input = scaler_10_features.transform(input_data)

        # Reshape for the LSTM model (1 sample, 10 timesteps, 1 feature per timestep)
        reshaped_input = scaled_input.reshape(1, scaled_input.shape[1], 1)

        # Make prediction using the trained model
        prediction_probability = model_10_features.predict(reshaped_input)[0][0]

        # Interpret the prediction
        if prediction_probability >= 0.5:
            return f"Predicted Sleep Stage: REM (Probability: {prediction_probability:.4f})"
        else:
            return f"Predicted Sleep Stage: NREM (Probability: {prediction_probability:.4f})"

    except ValueError:
        return "Error: Invalid input. Please enter numerical values."
    except Exception as e:
        return f"An unexpected error occurred: {e}"

# Get feature names and example values for the Gradio interface inputs
feature_names_10 = X_10_features.columns.tolist()
example_values_10 = X_10_features.iloc[0].values

# Create a list of gr.Number input components
gradio_inputs_10 = []
for i, feature_name in enumerate(feature_names_10):
    gr.Number(label=feature_name, value=example_values_10[i])
    gradio_inputs_10.append(gr.Number(label=feature_name, value=example_values_10[i]))

# Create the Gradio interface for the 10-feature model
iface_10_features = gr.Interface(
    fn=predict_sleep_stage_10_features,
    inputs=gradio_inputs_10,
    outputs=gr.Label(label="Prediction Result"),
    title="Sleep Stage Classification Model (Top 10 Features)",
    description="This interface predicts the sleep stage (REM or NREM) based on the top 10 most important features. Please enter numerical values for each feature."
)

# Launch the interface
iface_10_features.launch(debug=True, share=True)