## Part 1: Data Processing

### Process both time series and gamma phase-amplitude coupling (GPAC) images into dataframes

In [1]:
import numpy as np
import os
import pandas as pd
import scipy.io as sio

from IPython.display import display
from pyedflib import highlevel

In [2]:
def process_timeseries_data(path):
    """ Reads and processes all time series data from MATLAB files in the provided
    path. Only relevant data will be stored for use here.
    """
    all_data = {}

    for file_name in os.listdir(path):
        # 'data' will contain all relevant data that should be used for training.
        # Other keys will hold other useful information that shouldn't be directly
        # needed for training.
        data = {'data': {}}
        
        full_file_path = os.path.join(path, file_name)
        mat_data = sio.loadmat(full_file_path)

        # Time series data with size equal to the number of channels (i.e. electrodes).
        # The electrode names can be indexed to find the electrode that a specific
        # value in this time series data belongs to.
        data['data']['samples'] = list(np.swapaxes(np.array(mat_data['Data']), 0, 1))

        # Time points in seconds
        data['data']['time'] = np.array(mat_data['Time'][0])

        data['data'] = pd.DataFrame(data['data'])
        
        # List of electrode names following the 10-20 EEG recording system
        data['electrodes'] = np.array([item.item() for item in mat_data['Electrodes'][0]])

        all_data[full_file_path] = data

    return all_data


def process_gpac_image_data(path):
    """ Reads and processes all GPAC image data from MATLAB files in the provided
    path. Only relevant data will be stored for use here.
    """
    all_data = {}

    for file_name in os.listdir(path):
        # 'data' will contain all relevant data that should be used for training.
        # Other keys will hold other useful information that shouldn't be directly
        # needed for training.
        data = {'data': {}}
        
        full_file_path = os.path.join(path, file_name)
        mat_data = sio.loadmat(full_file_path)

        # Sample point number corresponding to the start of each 2s window
        data['data']['window_start'] = np.array(mat_data['data']['wind'][0][0][0])

        # GPAC data of size lf x hf, where lf and hf are the dimensions of the GPAC image
        # Note that each window has its data flattened to one list of size lf x hf. We need
        # to cast the reshaped data as a list so that, for this column, each row contains a
        # list of the lf x hf data.
        data['data']['fv'] = np.array(mat_data['data']['fv'][0][0])
        data['data']['fv'] = list(data['data']['fv'].reshape(data['data']['fv'].shape[0], -1))

        # Window labels: baseline interictal (0), blink (1), movement (2), or EMG (3). -1
        # means the window wasn't labelled.
        data['data']['label'] = np.squeeze(np.array(mat_data['training_labels']), axis=1)

        data['data'] = pd.DataFrame(data['data'])

        # The duration of the window chosen for analysis, which should always be 2s
        data['window_len'] = np.array(mat_data['ws'][0])
        
        # lf and hf are the frequency values corresponding to the phase range and amplitude
        # range of interest in the GPAC feature, respectively.
        data['lf'] = np.array(mat_data['lf'][0])
        data['hf'] = np.array(mat_data['hf'][0])

        all_data[full_file_path] = data

    return all_data


def process_eye_blinking_data(file_path):
    """ Reads and processes all eye blinking image data from MATLAB files in the provided
    path. Only relevant data will be stored for use here. Dataset parsed in this function
    was obtained from https://www.sciencedirect.com/science/article/pii/S0925231216001569.
    """
    # 'data' will contain all relevant data that should be used for training.
    # Other keys will hold other useful information that shouldn't be directly
    # needed for training.
    all_data = {}
    data = {'data': {}}
    mat_data = sio.loadmat(file_path)

    mat_data = np.concatenate(
        (mat_data['Epochs']['EyeblinkArtifactEpochs'][0,0]['Voluntary'][0,0]['All'][0,0],
        mat_data['Epochs']['EyeblinkArtifactEpochs'][0,0]['Involuntary'][0,0]['All'][0,0]),
        axis=0
    )
    mat_data = np.swapaxes(mat_data, 0, 1)

    # Time series data with size equal to the number of channels (i.e. electrodes).
    # The electrode names can be indexed to find the electrode that a specific
    # value in this time series data belongs to.
    data['data']['samples'] = list(np.array(mat_data.reshape(-1, mat_data.shape[0])))

    # Time points in seconds (note: window size is 4s)
    data['data']['time'] = np.array(np.ravel([np.linspace(0, 4, mat_data.shape[2], endpoint=False)] * mat_data.shape[1]))

    data['data'] = pd.DataFrame(data['data'])
    
    # List of electrode names following the 10-20 EEG recording system
    data['electrodes'] = np.array(['FP1', 'FP2', 'F3', 'F4', 'T3', 'C3', 'Cz', 'C4', 'T4', 'P3', 'Pz', 'P4', 'O1', 'O2'])

    all_data[file_path] = data
    return all_data


def process_hand_movement_data(path):
    """ Reads and processes all hand movement data from MATLAB files in the provided
    path. Only relevant data will be stored for use here. Dataset parsed in this function
    was obtained from https://academic.oup.com/gigascience/article/6/7/gix034/3796323.
    """
    all_data = {}

    for file_name in os.listdir(path):
        # 'data' will contain all relevant data that should be used for training.
        # Other keys will hold other useful information that shouldn't be directly
        # needed for training.
        data = {'data': {}}

        full_file_path = os.path.join(path, file_name)
        mat_data = sio.loadmat(full_file_path)

        # Indices that indicate when left or right hand movement begins. This movement lasts for 3 seconds, thus
        # movement indices are created by creating a range from <start> to <start + 3> seconds for each index.
        # These indices are the same for both the left and right hand movements.
        num_time_points_per_window = mat_data["eeg"]["srate"][0][0].item() * 3
        movement_start_indices, = np.where(mat_data["eeg"]["movement_event"][0][0][0] == 1)        
        movement_indices = np.array(np.ravel([np.arange(start, start + num_time_points_per_window) for start in movement_start_indices]))

        # The data contains 64 EEG channels and 4 EMG channels, take only the EEG channels.
        # Data is concatenated after extracting relevant data according to the movement indices
        all_eeg_data = [np.swapaxes(mat_data["eeg"][movement][0][0][:64], 0, 1) for movement in ["movement_left", "movement_right"]]
        data['data']['samples'] = list(np.concatenate(np.array([eeg_data[movement_indices] for eeg_data in all_eeg_data])))

        # Time points in seconds (note: window size is 3s).
        num_trials = int(len(data['data']['samples']) / num_time_points_per_window)
        data['data']['time'] = np.array(np.ravel([np.linspace(0, 3, num_time_points_per_window, endpoint=False)] * num_trials))

        data['data'] = pd.DataFrame(data['data'])
        all_data[full_file_path] = data
    
    return all_data


def process_resting_state_data(path):
    """ Reads and processes all hand movement data from MATLAB files in the provided
    path. Only relevant data will be stored for use here. Dataset parsed in this function
    was obtained from https://dataverse.tdl.org/dataset.xhtml?persistentId=doi:10.18738/T8/SS2NHB.
    """
    all_data = {}

    for file_name in os.listdir(path):
        # 'data' will contain all relevant data that should be used for training.
        # Other keys will hold other useful information that shouldn't be directly
        # needed for training.
        data = {'data': {}}

        full_file_path = os.path.join(path, file_name)
        eeg_data = highlevel.read_edf(full_file_path)

        # Each channel has a specified sample rate. They should all be equivalent, so confirm this.
        sample_rates = set([channel['sample_rate'] for channel in eeg_data[1]])
        assert len(sample_rates) == 1
        sample_rate = int(next(iter(sample_rates)))

        samples = np.swapaxes(np.array(eeg_data[0][:-7, :]), 0, 1)
        data['data']['samples'] = list(samples)

        # Time points in seconds no window size specified, so time increased indefinitely
        data['data']['time'] = np.linspace(0, samples.shape[0] / sample_rate, samples.shape[0], endpoint=False)

        # Last 7 elements of the electrodes array are irrelevant
        data['electrodes'] = np.array([channel['label'] for channel in eeg_data[1][:-7]])

        data['data'] = pd.DataFrame(data['data'])
        all_data[full_file_path] = data

    return all_data

In [6]:
# Read all relevant data from the MATLAB files
BASE_DATA_PATH = "data"
TIME_SERIES_DATA_PATH = os.path.join(BASE_DATA_PATH, "TimeSeries")
GPAC_IMAGES_DATA_PATH = os.path.join(BASE_DATA_PATH, "GPAC_Images_Labelled")
EYE_BLINKING_DATA_PATH = os.path.join(BASE_DATA_PATH, "Voluntary_Involuntary_Eye_Blinks", "Epochs.mat")
HAND_MOVEMENT_DATA_PATH = os.path.join(BASE_DATA_PATH, "Left_Right_Hand_MI", "mat_data")
RESTING_STATE_DATA_PATH = os.path.join(BASE_DATA_PATH, "Resting_State")

timeseries_data = process_timeseries_data(TIME_SERIES_DATA_PATH)
gpac_image_data = process_gpac_image_data(GPAC_IMAGES_DATA_PATH)
eye_blinking_data = process_eye_blinking_data(EYE_BLINKING_DATA_PATH)
hand_movement_data = process_hand_movement_data(HAND_MOVEMENT_DATA_PATH)
resting_state_data = process_resting_state_data(RESTING_STATE_DATA_PATH)

In [9]:
# Get some test data to show data structure
test_timeseries_data = next(iter(timeseries_data.values()))
test_gpac_data = next(iter(gpac_image_data.values()))
test_eye_blinking_data = next(iter(eye_blinking_data.values()))
test_hand_movement_data = next(iter(hand_movement_data.values()))
test_resting_state_data = next(iter(resting_state_data.values()))

In [10]:
# Time series data
print("---------- TIME SERIES DATA FORMAT ----------")

print("\n\nData For Training:")
display(test_timeseries_data['data'])

print("\n\nElectrode Names:")
display(test_timeseries_data['electrodes'])

print(f"\n\nNumber of Channels Per Sample: {len(test_timeseries_data['data']['samples'][0])}")


# GPAC image data
print("\n\n\n---------- GPAC IMAGE DATA FORMAT ----------")

print("\n\nData For Training:")
display(test_gpac_data['data'])

print(f"\n\nWindow Length (seconds): {test_gpac_data['window_len'].item()}")

print("\n\nPhase Range Of Interest:")
display(test_gpac_data['lf'])

print("\n\nAmplitude Range Of Interest:")
display(test_gpac_data['hf'])

print(f"\n\nNumber of Datapoints Per GPAC Image: {len(test_gpac_data['data']['fv'][0])}")


# Eye blinking data
print("\n\n\n---------- EYE BLINKING DATA FORMAT ----------")

# Note that for the eye blinking data, multiple windows from different sessions live together in the same
# data structure. Each of these windows is 4s in length. As a result, time does not increase in each
# successful row in the data structure indefinitely, but rather increases from 0 to (4 - 1/256) and then
# will reset to 0 for the next row.
print("\n\nData For Training:")
display(test_eye_blinking_data['data'])

print("\n\nElectrode Names:")
display(test_eye_blinking_data['electrodes'])

print(f"\n\nNumber of Channels Per Sample: {len(test_eye_blinking_data['data']['samples'][0])}")


# Hand movement data
print("\n\n\n---------- HAND MOVEMENT DATA FORMAT ----------")

# Note that for the hand movement data, multiple windows from different sessions live together in the same
# data structure. Each of these windows is 3s in length. As a result, time does not increase in each
# successful row in the data structure indefinitely, but rather increases from 0 to (3 - 1/512) and then
# will reset to 0 for the next row.
print("\n\nData For Training:")
display(test_hand_movement_data['data'])

print(f"\n\nNumber of Channels Per Sample: {len(test_hand_movement_data['data']['samples'][0])}")


# Resting state data
print("\n\n\n---------- RESTING STATE DATA FORMAT ----------")

print("\n\nData For Training:")
display(test_resting_state_data['data'])

print("\n\nElectrode Names:")
display(test_resting_state_data['electrodes'])

print(f"\n\nNumber of Channels Per Sample: {len(test_resting_state_data['data']['samples'][0])}")

---------- TIME SERIES DATA FORMAT ----------


Data For Training:


Unnamed: 0,samples,time
0,"[6.108219724308253e-05, 4.1935847751014113e-05...",0.000
1,"[6.0519399017154524e-05, 4.2618500927900726e-0...",0.002
2,"[6.742417259467301e-05, 4.1556992611811064e-05...",0.004
3,"[5.537121535626189e-05, 4.2360979111764424e-05...",0.006
4,"[5.912841684511108e-05, 4.846190553369728e-05,...",0.008
...,...,...
949494,"[-3.11911321717285e-05, 2.3324337114086137e-05...",1898.988
949495,"[-2.9876694427662773e-05, 2.2369713074451038e-...",1898.990
949496,"[-3.09238426711861e-05, 2.1764404072353062e-05...",1898.992
949497,"[-3.018560838381861e-05, 1.9767057467827266e-0...",1898.994




Electrode Names:


array(['FP1', 'F3', 'C3', 'P3', 'O1', 'F7', 'T3', 'T5', 'F9', 'T9', 'P9',
       'Fz', 'Cz', 'Pz', 'FP2', 'F8', 'T4', 'T6', 'O2', 'F4', 'C4', 'P4',
       'F10', 'T10', 'P10'], dtype='<U3')



Number of Channels Per Sample: 25



---------- GPAC IMAGE DATA FORMAT ----------


Data For Training:


Unnamed: 0,window_start,fv,label
0,1436.0,"[0.025656345787360417, 0.032179734517346216, 0...",0
1,2436.0,"[0.025213492886223276, 0.025289140962647953, 0...",3
2,3436.0,"[0.023487781808931615, 0.01826561582104771, 0....",1
3,4436.0,"[0.06850261987711749, 0.07487524108974554, 0.0...",2
4,5436.0,"[0.028818125484047188, 0.02471954469685771, 0....",0
...,...,...,...
941,942436.0,"[0.02258746497266887, 0.018580595931532815, 0....",-1
942,943436.0,"[0.03575053160503277, 0.03564018155802227, 0.0...",-1
943,944436.0,"[0.030996596628728112, 0.028858710017469907, 0...",-1
944,945436.0,"[0.030891367009021852, 0.030663095755187705, 0...",-1




Window Length (seconds): 2


Phase Range Of Interest:


array([ 1. ,  1.1,  1.2,  1.3,  1.4,  1.5,  1.6,  1.7,  1.8,  1.9,  2. ,
        2.1,  2.2,  2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3. ,  3.1,
        3.2,  3.3,  3.4,  3.5,  3.6,  3.7,  3.8,  3.9,  4. ,  4.1,  4.2,
        4.3,  4.4,  4.5,  4.6,  4.7,  4.8,  4.9,  5. ,  5.1,  5.2,  5.3,
        5.4,  5.5,  5.6,  5.7,  5.8,  5.9,  6. ,  6.1,  6.2,  6.3,  6.4,
        6.5,  6.6,  6.7,  6.8,  6.9,  7. ,  7.1,  7.2,  7.3,  7.4,  7.5,
        7.6,  7.7,  7.8,  7.9,  8. ,  8.1,  8.2,  8.3,  8.4,  8.5,  8.6,
        8.7,  8.8,  8.9,  9. ,  9.1,  9.2,  9.3,  9.4,  9.5,  9.6,  9.7,
        9.8,  9.9, 10. ])



Amplitude Range Of Interest:


array([ 20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,
        33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,
        46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
        59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,
        72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
        85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
        98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
       111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123,
       124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136,
       137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
       150], dtype=int64)



Number of Datapoints Per GPAC Image: 11921



---------- EYE BLINKING DATA FORMAT ----------


Data For Training:


Unnamed: 0,samples,time
0,"[9.709728321448466, 11.525812172656526, 13.811...",0.000000
1,"[13.009598524570704, 13.692492531665714, 13.11...",0.003906
2,"[9.744855775183703, 11.290155037317742, 12.201...",0.007812
3,"[40.81332724299266, 49.899748469388484, 62.085...",0.011719
4,"[151.66833596629317, 157.53203247940522, 159.1...",0.015625
...,...,...
893947,"[-0.07466539432252636, -0.33331168671831746, -...",3.980469
893948,"[-0.7983416909075477, -0.6307943974575243, -0....",3.984375
893949,"[-1.0631469307127797, -0.9587214956166292, -0....",3.988281
893950,"[-0.4765436059788351, -0.570592790940478, -0.5...",3.992188




Electrode Names:


array(['FP1', 'FP2', 'F3', 'F4', 'T3', 'C3', 'Cz', 'C4', 'T4', 'P3', 'Pz',
       'P4', 'O1', 'O2'], dtype='<U3')



Number of Channels Per Sample: 14



---------- HAND MOVEMENT DATA FORMAT ----------


Data For Training:


Unnamed: 0,samples,time
0,"[-8795.496, -168015.5, -167905.25, 392687.75, ...",0.000000
1,"[-8674.496, -168004.75, -167760.5, 392804.75, ...",0.001953
2,"[-8550.246, -168029.25, -167880.75, 392784.0, ...",0.003906
3,"[-8206.246, -167617.5, -167718.75, 392882.25, ...",0.005859
4,"[-8042.246, -167546.75, -167698.75, 392884.0, ...",0.007812
...,...,...
61435,"[-24420.746, -164252.0, -165280.5, 372158.5, 4...",2.990234
61436,"[-24517.496, -164330.25, -165492.5, 372145.75,...",2.992188
61437,"[-24435.746, -164441.0, -165577.75, 372174.0, ...",2.994141
61438,"[-24356.996, -164573.25, -165559.5, 372181.25,...",2.996094




Number of Channels Per Sample: 64



---------- RESTING STATE DATA FORMAT ----------


Data For Training:


Unnamed: 0,samples,time
0,"[-15039.956585881504, -16343.235427751268, -18...",0.000000
1,"[-15042.081581955048, -16348.829167415448, -18...",0.003906
2,"[-15039.644086458924, -16348.95416718448, -180...",0.007812
3,"[-15038.081589346026, -16347.985418974482, -18...",0.011719
4,"[-15042.67533085795, -16351.735412045442, -180...",0.015625
...,...,...
389883,"[-12615.77356515965, -15125.862677148738, -173...",1522.980469
389884,"[-12612.398571395786, -15122.331433673584, -17...",1522.984375
389885,"[-12606.773581789348, -15113.487700014572, -17...",1522.988281
389886,"[-12604.08608675516, -15109.300207752, -17298....",1522.992188




Electrode Names:


array(['Fp1', 'AF7', 'AF3', 'F1', 'F3', 'F5', 'F7', 'FT7', 'FC5', 'FC3',
       'FC1', 'C1', 'C3', 'C5', 'T7', 'TP7', 'CP5', 'CP3', 'CP1', 'P1',
       'P3', 'P5', 'P7', 'P9', 'PO7', 'PO3', 'O1', 'Iz', 'Oz', 'POz',
       'Pz', 'CPz', 'Fpz', 'Fp2', 'AF8', 'AF4', 'AFz', 'Fz', 'F2', 'F4',
       'F6', 'F8', 'FT8', 'FC6', 'FC4', 'FC2', 'FCz', 'Cz', 'C2', 'C4',
       'C6', 'T8', 'TP8', 'CP6', 'CP4', 'CP2', 'P2', 'P4', 'P6', 'P8',
       'P10', 'PO8', 'PO4', 'O2', 'M1', 'M2'], dtype='<U3')



Number of Channels Per Sample: 66


In [5]:
def label_time_series(time_series, GPAC_series):
    """
    Fucntion that will translate the labels from the GPAC Dataframe to the Time-Series DataFrame 
    """
    
    # Initial label value for all rows in the time_series
    time_series['data']['label'] = -1
    
    # Iterate through the GPAC rows to obtain the window value and the label 
    for index, row in GPAC_series['data'].iterrows():
        window_start = int(row['window_start'])
        label_value = row['label']
        
        # Set the 'label' value for the time series Dataframe
        # Add 1000 to window_start because in time_series it is separated by intervals of 0.002 s.
        # 1 window size = 2s, 2 / 0.002 = 1000 rows 
        time_series['data'].loc[window_start : window_start+1000, 'label'] = label_value
    
    return time_series
    
test_timeseries_data_labelled = label_time_series(test_timeseries_data, test_gpac_data)

print("\n\nAs seen in the GPAC Dataframe in the previous cell, the label begins at t=1436, and below you can see that change:")
display(test_timeseries_data_labelled['data'].iloc[1433:1439])

print("\n\nSimilarly, the second label begins at t=2436, and below you can see that change:")
display(test_timeseries_data_labelled['data'].iloc[2433:2439])

print("\n\nLabel value statistics of the time-series data:")
print(test_timeseries_data_labelled['data']['label'].value_counts())

print("\n\nLabel value statistics of the GPAC data:")
print(test_gpac_data['data']['label'].value_counts())



As seen in the GPAC Dataframe in the previous cell, the label begins at t=1436, and below you can see that change:


Unnamed: 0,samples,time,label
1433,"[-3.918726818570923e-05, 3.1590051061025356e-0...",2.866,-1
1434,"[-2.7482217912228493e-05, 3.294411591614816e-0...",2.868,-1
1435,"[-3.871242545568842e-05, 3.251909070927831e-05...",2.87,-1
1436,"[-4.014147248365788e-05, 3.043857439682265e-05...",2.872,0
1437,"[-3.478690402970244e-05, 3.202987604497138e-05...",2.874,0
1438,"[-3.337380150125322e-05, 3.7954873986926406e-0...",2.876,0




Similarly, the second label begins at t=2436, and below you can see that change:


Unnamed: 0,samples,time,label
2433,"[6.336374464748228e-05, 6.706071296848414e-05,...",4.866,0
2434,"[5.307997547277073e-05, 6.623601097400111e-05,...",4.868,0
2435,"[5.416560886950276e-05, 6.953586163041345e-05,...",4.87,0
2436,"[6.708489008597993e-05, 7.019486785538814e-05,...",4.872,3
2437,"[7.915946937218587e-05, 7.136690139654629e-05,...",4.874,3
2438,"[7.392872574665033e-05, 7.387425424376896e-05,...",4.876,3




Label value statistics of the time-series data:
-1    889499
 3     15000
 2     15000
 1     15000
 0     15000
Name: label, dtype: int64


Label value statistics of the GPAC data:
-1    886
 3     15
 2     15
 1     15
 0     15
Name: label, dtype: int64


In [6]:
def split_gpac_data(gpac_image_data):
    """
    Function that takes all of the data, filters out the rows w/ a label of -1, and then splits 
    the data into Training, Validation, and Test 
    """
    
    all_data = pd.DataFrame(data=None, columns=['fv', 'label'])
    for data_file in gpac_image_data:
        
        # Isolate the image data portion of each dataset
        sub_data = gpac_image_data[data_file]['data']
        sub_data = sub_data.drop(['window_start'], axis=1)
        
        # Now we need to filter out the rows where the 'label' value is -1
        all_data = all_data.append(sub_data[sub_data['label'] > -1])
 
    # Now is the splitting of the dataframe into train, test, and validation datafames
    train_data, valid_data, test_data = np.split(all_data.sample(frac=1, random_state=42),
                                                [int(0.7 * len(all_data)), int(0.85 * len(all_data))])

    return train_data, valid_data, test_data

train_GPAC_data, valid_GPAC_data, test_GPAC_data = split_gpac_data(gpac_image_data)

print('\n\n\nTraining DataFrame and class breakdown')
display(train_GPAC_data)
print(train_GPAC_data['label'].value_counts())

print('\n\n\nValidation DataFrame and class breakdown')
display(valid_GPAC_data)
print(valid_GPAC_data['label'].value_counts())


print('\n\n\nTesting DataFrame and class breakdown')
display(test_GPAC_data)
print(test_GPAC_data['label'].value_counts())




Training DataFrame and class breakdown


Unnamed: 0,fv,label
23,"[0.018301559404177783, 0.016191050183418685, 0...",0
25,"[0.01994675806197388, 0.01836840894162138, 0.0...",1
3,"[0.027993013047380923, 0.027283257408556676, 0...",3
57,"[0.062484502857957896, 0.06227094366153695, 0....",2
116,"[0.036048090198878564, 0.03346645338383887, 0....",0
...,...,...
301,"[0.04243789441092988, 0.031468892938120184, 0....",2
78,"[0.01867367673840801, 0.018222437391824706, 0....",3
111,"[0.030582777834395232, 0.027439392712002166, 0...",0
67,"[0.023998793777635818, 0.023887692881069233, 0...",2


1    185
0    185
2    175
3    168
Name: label, dtype: int64



Validation DataFrame and class breakdown


Unnamed: 0,fv,label
57,"[0.023210768120139995, 0.02397481439445296, 0....",0
391,"[0.012000751484808949, 0.014668486885496884, 0...",2
141,"[0.02297273660933318, 0.028571106234670976, 0....",3
659,"[0.018681566582652482, 0.02137612430195521, 0....",2
51,"[0.01742880927542176, 0.01727879289693374, 0.0...",0
...,...,...
294,"[0.019274734963773667, 0.01964530289860449, 0....",3
74,"[0.017314829640633116, 0.019576010425275958, 0...",0
30,"[0.021855765364977152, 0.02317689953382368, 0....",3
3,"[0.025761694022292014, 0.02667989210991661, 0....",0


3    47
1    38
2    36
0    32
Name: label, dtype: int64



Testing DataFrame and class breakdown


Unnamed: 0,fv,label
59,"[0.01873868774702581, 0.019221709357276837, 0....",1
362,"[0.011255097478693965, 0.010094310805970149, 0...",3
449,"[0.02641322268470771, 0.02416144737316132, 0.0...",2
107,"[0.015150718532073403, 0.019016164209349193, 0...",0
41,"[0.014017843437453499, 0.014647994612369902, 0...",1
...,...,...
13,"[0.02246842926240328, 0.020827275156348645, 0....",0
96,"[0.020721969920910452, 0.02032751075864317, 0....",3
478,"[0.03105682494659574, 0.030114205713642136, 0....",1
97,"[0.013754220693483293, 0.015193689500375317, 0...",3


2    44
3    40
0    37
1    32
Name: label, dtype: int64
