In [1]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
import numpy as np
import collections

In [2]:
seed = 2019
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)

In [3]:
pnt_path = '/mnt/data7_M2/Tennison_TUH_Reprocessed_STFT/stft_1s_64/'
X = np.load(pnt_path + 'data_x.npy') # TUH
y = np.load(pnt_path + 'data_y.npy')

In [16]:
fold = 1
for index, [train, test] in enumerate(kfold.split(X, y)):
    X_train, X_val, y_train, y_val = train_test_split(X[train], y[train], test_size=0.25, stratify=y[train], random_state=2019)
    X_test, y_test = X[test], y[test]
    
    c_train = collections.Counter(y_train)
    c_val = collections.Counter(y_val)
    c_test = collections.Counter(y_test)
    
    train_total = sum(c_train.values())
    train_percent = {key: round(value/train_total, 2) for key, value in c_train.items()}
    
    val_total = sum(c_val.values())
    val_percent = {key: round(value/val_total, 2) for key, value in c_val.items()}

    test_total = sum(c_test.values())
    test_percent = {key: round(value/test_total, 2) for key, value in c_test.items()}
    
    print('Distribution for fold {}'.format(fold))
    print('Distribution of classes on train set: ')
    print(sorted(dict(c_train).items()))
    print('Proportion of classes on train set:')
    print(sorted(train_percent.items()))
    print('Distribution of classes on val set: ')
    print(sorted(dict(c_val).items()))
    print('Proportion of classes on val set:')
    print(sorted(val_percent.items()))
    print('Distribution of classes on test set: ')
    print(sorted(dict(c_test).items()))
    print('Proportion of classes on test set:')
    print(sorted(test_percent.items()))
    print('\n')
    
    fold += 1

Distribution for fold 1
Distribution of classes on train set: 
[('ABSZ', 282), ('CPSZ', 14700), ('FNSZ', 23826), ('GNSZ', 12278), ('MYSZ', 775), ('SPSZ', 785), ('TCSZ', 1023), ('TNSZ', 244)]
Proportion of classes on train set:
[('ABSZ', 0.01), ('CPSZ', 0.27), ('FNSZ', 0.44), ('GNSZ', 0.23), ('MYSZ', 0.01), ('SPSZ', 0.01), ('TCSZ', 0.02), ('TNSZ', 0.0)]
Distribution of classes on val set: 
[('ABSZ', 94), ('CPSZ', 4900), ('FNSZ', 7942), ('GNSZ', 4093), ('MYSZ', 258), ('SPSZ', 262), ('TCSZ', 341), ('TNSZ', 82)]
Proportion of classes on val set:
[('ABSZ', 0.01), ('CPSZ', 0.27), ('FNSZ', 0.44), ('GNSZ', 0.23), ('MYSZ', 0.01), ('SPSZ', 0.01), ('TCSZ', 0.02), ('TNSZ', 0.0)]
Distribution of classes on test set: 
[('ABSZ', 95), ('CPSZ', 4900), ('FNSZ', 7942), ('GNSZ', 4093), ('MYSZ', 259), ('SPSZ', 262), ('TCSZ', 341), ('TNSZ', 82)]
Proportion of classes on test set:
[('ABSZ', 0.01), ('CPSZ', 0.27), ('FNSZ', 0.44), ('GNSZ', 0.23), ('MYSZ', 0.01), ('SPSZ', 0.01), ('TCSZ', 0.02), ('TNSZ', 0.0)]



In [36]:
train_test_split?

In [26]:
from utils.prep_data import train_val_test_nfold_split
n_folds = train_val_test_nfold_split(X,y)
for X_train, Y_train, X_val, Y_val, X_test, Y_test in n_folds:
    print(X_train.shape)
    print(X_val.shape)
    print(X_test.shape)
    print(collections.Counter(Y_train))
    print(collections.Counter(Y_val))
    print(collections.Counter(Y_test))

Indices after shuffling:
['CPSZ' 'CPSZ' 'TCSZ' 'CPSZ' 'GNSZ' 'FNSZ' 'FNSZ' 'FNSZ' 'FNSZ' 'CPSZ']
<class 'numpy.ndarray'>
(53913, 32, 9, 19)
(17971, 32, 9, 19)
(17971, 32, 9, 19)
Counter({'FNSZ': 23940, 'CPSZ': 14671, 'GNSZ': 12237, 'TCSZ': 986, 'SPSZ': 805, 'MYSZ': 772, 'ABSZ': 265, 'TNSZ': 237})
Counter({'FNSZ': 7892, 'CPSZ': 4920, 'GNSZ': 4113, 'TCSZ': 352, 'SPSZ': 267, 'MYSZ': 247, 'ABSZ': 103, 'TNSZ': 77})
Counter({'FNSZ': 7878, 'CPSZ': 4907, 'GNSZ': 4113, 'TCSZ': 367, 'MYSZ': 273, 'SPSZ': 236, 'ABSZ': 103, 'TNSZ': 94})
<class 'numpy.ndarray'>
(53913, 32, 9, 19)
(17971, 32, 9, 19)
(17971, 32, 9, 19)
Counter({'FNSZ': 23917, 'CPSZ': 14676, 'GNSZ': 12219, 'TCSZ': 1017, 'MYSZ': 775, 'SPSZ': 769, 'ABSZ': 279, 'TNSZ': 261})
Counter({'FNSZ': 7892, 'CPSZ': 4920, 'GNSZ': 4113, 'TCSZ': 352, 'SPSZ': 267, 'MYSZ': 247, 'ABSZ': 103, 'TNSZ': 77})
Counter({'FNSZ': 7901, 'CPSZ': 4902, 'GNSZ': 4131, 'TCSZ': 336, 'SPSZ': 272, 'MYSZ': 270, 'ABSZ': 89, 'TNSZ': 70})
<class 'numpy.ndarray'>
(53913, 32, 9

In [14]:
import numpy as np
np.random.rand(2, 2, 1)

array([[[0.83022872],
        [0.63075085]],

       [[0.72343735],
        [0.04854371]]])

In [10]:
import tensorflow as tf

x = tf.constant([[1, 2, 4], [8, 16, 32]])
# 4, 2, 2

x = tf.constant([[[[1]], [[2]]], [[[3]], [[4]]], [[[5]], [[6]]], [[[7]], [[8]]]])
print(x.shape)
a = tf.reduce_sum(x, 0)  # [ 9 18 36]
b = tf.reduce_sum(x, 1)  # [ 7 56]
c = tf.reduce_sum(x, [0, 1])  # 63

with tf.Session() as sess:
    output_a = sess.run(a)
    print(output_a)
    print(output_a.shape)
    output_b = sess.run(b)
    print(output_b)
    print(output_b.shape)
    output_c = sess.run(c)
    print(output_c)
    print(output_c.shape)

(4, 2, 1, 1)
[[[16]]

 [[20]]]
(2, 1, 1)
[[[ 3]]

 [[ 7]]

 [[11]]

 [[15]]]
(4, 1, 1)
[[36]]
(1, 1)


In [5]:
tf.reduce_sum?