# First try of 10 fold cross-validation with a simple neural network

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

from keras import Sequential
from keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Dropout
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


In [3]:
#extracting data from csv files into numpy arrays
from numpy import genfromtxt
files_data=np.array(['fold_data_1.csv','fold_data_2.csv','fold_data_3.csv','fold_data_4.csv','fold_data_5.csv',
                'fold_data_6.csv','fold_data_7.csv','fold_data_8.csv','fold_data_9.csv','fold_data_10.csv'])
X_folds= np.array([genfromtxt(file,delimiter=',') for file in files_data])

files_label=(['fold_labels_1.csv','fold_labels_2.csv','fold_labels_3.csv','fold_labels_4.csv','fold_labels_5.csv',
                'fold_labels_6.csv','fold_labels_7.csv','fold_labels_8.csv','fold_labels_9.csv','fold_labels_10.csv'])
Y_folds= np.array([genfromtxt(file,delimiter=',') for file in files_label])

In [4]:
X_folds[0].shape

(873, 256)

In [39]:
# Reshaping Y_folds (Note: NOT for CNN)
for i in range(0, len(Y_folds)):
    Y_folds[i]=np.reshape(Y_folds[i],(Y_folds[i].shape[0], 1))

In [3]:
# Changing the labels into categorical (Note: ONLY for CNN)
from keras.utils.np_utils import to_categorical
Y_folds=([to_categorical(Y_folds, num_classes=10) for Y_folds in Y_folds])

In [40]:
# Reshaping folds
for i in range(0, len(X_folds)):
    X_folds[i]=np.reshape(X_folds[i],(X_folds[i].shape[0], 40, 6))

In [41]:
# 10 fold cross-validation
import pandas as pd
import os
import numpy as np
from sklearn import metrics
from scipy.stats import zscore
from sklearn.model_selection import KFold
    
accuracy_scores = []

fold = 0
for i in range (0, len(Y_folds)) :
    fold+=1
    print(f"Fold #{fold}")
    
    x_train = np.vstack([X_folds[j] for j in range(0, len(Y_folds)) if i!=j])
    y_train = np.vstack([Y_folds[j] for j in range(0, len(Y_folds)) if i!=j])
    x_test = np.vstack([X_folds[j] for j in range(0, len(Y_folds)) if i==j])
    y_test = np.vstack([Y_folds[j] for j in range(0, len(Y_folds)) if i==j])
    
    model = Sequential()
    model.add(Flatten(input_shape=[40, 6]))
    model.add(Dense(300, activation="relu"))   
    model.add(Dense(100, activation="relu"))
    model.add(Dense(10, activation="softmax"))
    model.compile(loss = "sparse_categorical_crossentropy", optimizer="sgd", metrics = ["accuracy"])
    model.fit(x_train,y_train,batch_size=50,epochs=30,validation_data=(x_test,y_test), verbose=0)
    model_evaluation = model.evaluate(x_test, y_test, verbose = 0)
    score = model_evaluation[1]
    accuracy_scores.append(score)
    print("Fold score (Accuracy): {score}".format(score = score))

# Build the oos prediction list and calculate the error.
final_score = np.mean(accuracy_scores)
print(f"Final, out of sample score (Accuracy): {final_score}")  

Fold #1
(7859, 40, 6)
(7859, 1)
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where








Fold score (Accuracy): 0.11454753743277656
Fold #2
(7844, 40, 6)
(7844, 1)
Fold score (Accuracy): 0.11261261261261261
Fold #3
(7807, 40, 6)
(7807, 1)
Fold score (Accuracy): 0.03891891891891892
Fold #4
(7742, 40, 6)
(7742, 1)
Fold score (Accuracy): 0.10101010119072115
Fold #5
(7796, 40, 6)
(7796, 1)
Fold score (Accuracy): 0.10683760683760683
Fold #6
(7909, 40, 6)
(7909, 1)
Fold score (Accuracy): 0.12150668289019009
Fold #7
(7894, 40, 6)
(7894, 1)
Fold score (Accuracy): 0.11933174224343675
Fold #8
(7926, 40, 6)
(7926, 1)
Fold score (Accuracy): 0.10794044665012408
Fold #9
(7916, 40, 6)
(7916, 1)
Fold score (Accuracy): 0.12254901960784313
Fold #10
(7895, 40, 6)
(7895, 1)
Fold score (Accuracy): 0.1111111111111111
Final, out of sample score (Accuracy): 0.10563657794953411


# 10 fold cross-validation for a Convolutional Neural Network

In [4]:
# Reshaping folds for CNN
for i in range(0, len(X_folds)):
    X_folds[i]=np.reshape(X_folds[i],(X_folds[i].shape[0], 40, 6, 1))

In [6]:
# 10 fold cross-validation
import pandas as pd
import os
import numpy as np
from sklearn import metrics
from scipy.stats import zscore
from sklearn.model_selection import KFold
    
accuracy_scores = []

fold = 0
for i in range (0, len(Y_folds)) :
    fold+=1
    print(f"Fold #{fold}")
    
    x_train = np.vstack([X_folds[j] for j in range(0, len(Y_folds)) if i!=j])
    y_train = np.vstack([Y_folds[j] for j in range(0, len(Y_folds)) if i!=j])
    x_test = np.vstack([X_folds[j] for j in range(0, len(Y_folds)) if i==j])
    y_test = np.vstack([Y_folds[j] for j in range(0, len(Y_folds)) if i==j])
    
    #forming model
    model=Sequential()

    model.add(Conv2D(64,kernel_size=5,strides=1,padding="Same",activation="relu",input_shape=(40,6,1)))
    model.add(MaxPooling2D(padding="same"))

    model.add(Conv2D(128,kernel_size=5,strides=1,padding="same",activation="relu"))
    model.add(MaxPooling2D(padding="same"))
    model.add(Dropout(0.3))

    model.add(Flatten())

    model.add(Dense(256,activation="relu"))
    model.add(Dropout(0.3))

    model.add(Dense(512,activation="relu"))
    model.add(Dropout(0.3))

    model.add(Dense(10,activation="softmax"))

    #compiling
    model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])

    #training the model
    model.fit(x_train,y_train,batch_size=50,epochs=30, verbose = 0,validation_data=(x_test,y_test))
    
    model_evaluation = model.evaluate(x_test, y_test, verbose = 0)
    score = model_evaluation[1]
    accuracy_scores.append(score)
    print("Fold score (Accuracy): {score}".format(score = score))

# Build the oos prediction list and calculate the error.
final_score = np.mean(accuracy_scores)
print(f"Final, out of sample score (Accuracy): {final_score}") 

Fold #1





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Train on 7859 samples, validate on 873 samples
Epoch 1/30





Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30

KeyboardInterrupt: 