# Kara One - Transduction

## Load Dataset

In [None]:
from lib import KaraOneDataset

dataset_dir = "C:\\Users\\win8t\\OneDrive\\Desktop\\projects\\kara-one-transduction\\p"
dataset = KaraOneDataset(dataset_dir, end_idx=165, scale_data=False, start_idx=163)

In [None]:
import numpy as np
dataset[0]["eeg_vocal"].shape, dataset[0]["eeg_vocal_raw"].shape, dataset[0]["eeg_vocal_feats"].shape

## Single Channel Analysis

From: [Paper](http://www.cs.toronto.edu/~complingweb/data/karaOne/ZhaoRudzicz15.pdf)

Pearson correlation coefficients between audio features and imagined speech EEG features

|Sensor | FC6    | FT8 |  C5 | CP3|  P3 |
| - | - | - | - | - | - |
|Mean r | 0.3781 | 0.3758 | 0.3728 | 0.3720 | 0.3696 |

| Sensor | T7 | CP5 | C3 | CP1 |C4 |
| - | - | - | - | - | - |
Mean r | 0.3686|  0.3685|  0.3659| 0.3626 |0.3623

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_eeg(eeg_type, target_channels, idx, feat=False):
    plt.rcParams["figure.figsize"] = (14, 9)

    if not target_channels:
        target_channels = dataset.eeg_data.ch_names
    
    print("target_channels:", target_channels)
    
    print(dataset.eeg_data)

    keep_idx_s = [dataset.eeg_data.ch_names.index(target_ch)
                for target_ch in target_channels]

    example = dataset[idx]
    print(example["label"])
    eeg_data = example[eeg_type]
    # eeg_data = eeg_data - eeg_data.mean(axis=1, keepdims=True)
    print("eeg data shape:", eeg_data.shape)

    if feat:
        eeg_data = np.asarray(np.split(eeg_data, 62))
    
    for idx in keep_idx_s:
        print("idx:", idx)
        label = dataset.eeg_data.ch_names[idx]
        if feat:
            data = eeg_data[idx, 16]
        else:
            data = eeg_data[:135, (idx*5)+2]
            # data = eeg_data[:, (idx*5)+2] # working (p_r, rms)
        # print(eeg_data.shape)
        # print(data.shape, eeg_data.shape, eeg_data.shape[1] / 1000, data)
        print("CUR ELECTRODE DATA:", data.shape)
        plt.plot(data, label=label)
        print(len(data), len(data / max(data)))

    plt.legend(loc="upper left")
    plt.show()

# target_channels = "FC6 FT8 C5 CP3 P3 T7 CP5 C3 CP1 C4".split(" ") # top 10
# target_channels = "FC6 FT8 C5 CP3 CP5 C3 CP1 C4".split(" ") # top 10 - T7 (temporal 7?)
# target_channels = target_channels[0:10]
target_channels = []

In [None]:
plot_eeg("eeg_vocal_feats", target_channels, idx=0, feat=False)
target_channels

### Audio Visualisation

In [None]:
example = dataset[0]
plt.plot(example["audio_raw"])
len(example["audio_raw"]) / 16_000

### Mel Spectrogram of Audio (Vocalised)

In [None]:
from matplotlib import cm

def plot_mel_spectrogram(mel_spec, title):
    fig, ax = plt.subplots(1)

    ax.set_title(f"Mel Spectogram \"{title}\"")
    pred = np.swapaxes(mel_spec, 0, 1)
    cax = ax.imshow(pred, interpolation='nearest', cmap=cm.coolwarm, origin='lower')

    return fig

In [None]:
example_mel_spec = example["audio_feats"]
print(example_mel_spec.shape)
a = plot_mel_spectrogram(example_mel_spec, example["label"])