# Visualisation Data Generation
___

## Selected data
___
**Analyse my data**: Currently visualized subject in that page is subject **55**, night **1**.

**Performance**:
- Currently visualized hypnogram in Classifier vs Physionet and Electrophysiologist vs Physionet is subject **55**, night **1**. 
- Currently visualized hypnogram in Classifier vs Electrophysiologist is subject **55**, night **1**. 

## Data
___

This notebook aims to generate data that will be used by our visualisations. We generate, for a night recording and its sleep stage scoring, two files:
- `hypnogram.csv`:

    Each row contains the following tuple (timestamp:`int`, sleep stage:`int`) for each 30 seconds epoch of the night. The sleep stage equivalences are: 
    ```
"W": 0,
"N1": 1,
"N2": 2,
"N3": 3,
"REM": 4
    ```


- `spectrograms.json`:

    Dictionnary that contains all information needed to display both spectrograms for each EEG channel. All 30 seconds epochs corresponds to those found in `hypnogram.csv`.
    
    ```
{
    "frequencies": [float],  # 75 frequencies corresponding to the y-axis
    "Fpz-Cz": [              # List for all 30 second epochs
        [float]              # 75 power values (in log scale) associated to the frequencies
    ],
    "Pz-Oz": [               # List for all 30 second epochs
        [float               # 75 power values (in log scale) associated to the frequencies
    ]
}
    ```

In [None]:
%matplotlib inline

import mne
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from time import altzone
from pytz import timezone
from json import dump
import os

from utils import fetch_data
from constants import (SLEEP_STAGES_VALUES,
                       DATASET_SLEEP_STAGES_VALUES,
                       N_STAGES,
                       EEG_CHANNELS,
                       EPOCH_DURATION,
                       SAMPLING_FREQ)

In [None]:
NB_EPOCHS_AWAKE_MORNING = 60
    
NYQUIST_FREQ = SAMPLING_FREQ/2
MAX_TIME = EPOCH_DURATION - 1. / SAMPLING_FREQ  # tmax in included

SUBJECT = [3]
NIGHT = [2]
FOLDER_NAME = f"./data/subject{SUBJECT[0]}-night{NIGHT[0]}"

## Extract data
___

In [None]:
file_names = fetch_data(subjects=SUBJECT, recording=NIGHT)
print(file_names)

raw_data = mne.io.read_raw_edf(file_names[0][0], preload=True, stim_channel=None, verbose=False)
annot = mne.read_annotations(file_names[0][1])
raw_data.set_annotations(annot, emit_warning=False)


In [None]:
df_records = pd.read_csv("data/recordings-info.csv")
df_records.head(2)

In [None]:
info = df_records[(df_records['subject'] == SUBJECT[0]) & (df_records['night'] == NIGHT[0])]
print(info)

# Number of seconds since file began
closed_lights_time = info['LightsOffSecond'].values[0]
woke_up_time = closed_lights_time + info['NightDuration'].values[0] + NB_EPOCHS_AWAKE_MORNING*EPOCH_DURATION

raw_data.crop(tmin=closed_lights_time, tmax=min(woke_up_time, raw_data.times[-1]))

events, annot_event_id = mne.events_from_annotations(
    raw_data,
    event_id=DATASET_SLEEP_STAGES_VALUES,
    chunk_duration=EPOCH_DURATION,
    verbose=False)

# Few files do not have N3 sleep (i.e. SC4202EC-Hypnogram), so we have to filter out key-value pairs that are not in the annotations.
event_id = { 
    event_key: SLEEP_STAGES_VALUES[event_key] 
    for event_key in SLEEP_STAGES_VALUES
    if SLEEP_STAGES_VALUES[event_key] in annot_event_id.values()
}

In [None]:
epochs = mne.Epochs(
    raw=raw_data,
    events=events,
    event_id=event_id,
    tmin=0.,
    tmax=MAX_TIME,
    preload=True,
    baseline=None,
    verbose=False)

epochs.drop_channels([
 'EOG horizontal',
 'Resp oro-nasal',
 'EMG submental',
 'Temp rectal',
 'Event marker'])

y = np.array([event[-1] for event in epochs.events])

In [None]:
print("Number of epochs with raw data:           ", epochs.get_data().shape)
print("Number of staged epochs:                  ", y.shape)
print("Time between get out of bed and bed time: ", (woke_up_time - closed_lights_time)/EPOCH_DURATION)
print("Night duration with extra morning time:   ", info['NightDuration'].values[0] + NB_EPOCHS_AWAKE_MORNING*EPOCH_DURATION)
print("Time of bedtime:                          ", info['LightsOff'].values[0])

In [None]:
plt.rcParams["figure.figsize"] = (20,3)

raw_data.copy().pick('EEG Fpz-Cz').plot(butterfly=True, duration=info['NightDuration'].values[0]);

In [None]:
raw_data.copy().pick('EEG Pz-Oz').plot(butterfly=True, duration=info['NightDuration'].values[0]);

## Generate hypnogram
___

In [None]:
y_with_offset = np.array([(index*EPOCH_DURATION, stage) for index, stage in enumerate(y)])

plt.rcParams["figure.figsize"] = (20,5)
plt.plot([y[0]/3600 for y in y_with_offset], [y[1] for y in y_with_offset])
plt.xlabel("Time since bed time (hours)")
plt.ylabel("Sleep stage")
plt.gca().invert_yaxis()

In [None]:
y_with_offset

In [None]:
info['LightsOff'].values[0]

In [None]:
datetime_bedtime = datetime.strptime(info['LightsOff'].values[0], "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone('utc'))
print(datetime_bedtime.timestamp())

y_with_timestamp = np.array([(
    int((datetime_bedtime + timedelta(seconds=index*EPOCH_DURATION)).timestamp()),
    int(stage))
    for index, stage in enumerate(y)])
y_with_timestamp

In [None]:
plt.rcParams["figure.figsize"] = (20,5)
plt.plot([y[0] for y in y_with_timestamp], [y[1] for y in y_with_timestamp])
plt.xlabel("Unix timestamp")
plt.ylabel("Sleep stage")
plt.gca().invert_yaxis()

### Save hypnogram to file
___

In [None]:
if not os.path.exists(FOLDER_NAME):
    os.mkdir(FOLDER_NAME);

np.savetxt(
    f"{FOLDER_NAME}/hypnogram.csv",
    y_with_timestamp,
    fmt=['%10.0i', '%1.0i'],
    delimiter=',',
    header="timestamp,sleep_stage",
    comments='')

## Generate spectrogram
___

In [None]:
epochs.plot_psd(fmin=0.5, fmax=30., dB=True)

In [None]:
psds, freqs = mne.time_frequency.psd_welch(epochs, fmin=0.5, fmax=30.)
# Convert psds into dB scale
psds = 10 * np.log10(np.maximum(psds, np.finfo(float).tiny))

psds.shape, freqs.shape

In [None]:
spectrograms = {
    'Frequencies': freqs.tolist(),
    'Fpz-Cz': psds[:,0,:].tolist(),
    'Pz-Oz': psds[:,1,:].tolist()
}

with open(f"{FOLDER_NAME}/spectrograms.json", 'w') as json_file:
    dump(spectrograms, json_file)

In [None]:
# Plot histogram
plt.rcParams["figure.figsize"] = (20,1.5)
plt.plot([y[0] for y in y_with_offset], [y[1] for y in y_with_offset])
plt.xlabel("Time since bed time (seconds)")
plt.ylabel("Sleep stage")
plt.gca().invert_yaxis()
plt.show()

# Plot spectrogram of Fpz-Cz
plt.rcParams["figure.figsize"] = (20,5)
plt.pcolormesh(
    [y[0] for y in y_with_offset],
    freqs,
    psds[:,0,:].T)
plt.ylim([0,30])
plt.xlabel('Time (s)'), plt.ylabel('Frequency (Hz)'),  plt.title('Spectrogram for Fpz-Cz')
plt.show()

# Plot spectrogram of Pz-Oz
plt.pcolormesh(
    [y[0] for y in y_with_offset],
    freqs,
    psds[:,1,:].T)
plt.ylim([0,30])
plt.xlabel('Time (s)'), plt.ylabel('Frequency (Hz)'),  plt.title('Spectrogram for Pz-Oz')
plt.show()