In [1]:
import numpy as np

In [2]:
# THE FUNCTION RANDOMLY SHUFFLES THE TRAINING EXAMPLES AND LABELS.

def randomShuffle(X,y):
    # Input:  Matrix of examples X.
    #         Vector of labels y.
    # Output: Matrix of examples X randomly shuffled.
    #         Vector of labels y randomly shuffled.
    
    m = np.shape(X)[1]
    
    permutation = list(np.random.permutation(m))
    X = X[:,permutation]
    y = y[:,permutation]
    
    return X, y

In [3]:
# THE FUNCTION NORMALIZES THE INPUT DATA X.

def normInput(X):
    # Input:  Matrix of examples X.
    # Output: Matrix of examples X normalized.
    
    X_norm = (X - np.mean(X,axis=1,keepdims=True)) / np.std(X,axis=1,keepdims=True)
    
    return X_norm

In [4]:
# THE FUNCTION SPLITS THE INPUT DATA INTO TRAINING, VALIDATION AND TEST DATA.

def splitData(X,y,trainProp):
    # Input:  Matrix of examples X.
    #         Vector of labels y.
    # Output: Matrix of training examples X_train and its labels y_train.
    #         Matrix of validation examples X_valid and its labels y_valid.
    #         Matrix of test examples X_test and its labels y_test.
    
    trainSize = math.floor(X.shape[1]*trainProp)
    X_train = X[:,:trainSize]
    y_train = y[:,:trainSize]
    
    validSize = math.floor((X.shape[1] - trainSize)/2)
    X_valid = X[:,trainSize:(trainSize+validSize)]
    y_valid = y[:,trainSize:(trainSize+validSize)]
    
    X_test = X[:,(trainSize+validSize):]
    y_test = y[:,(trainSize+validSize):]
    
    return X_train, y_train, X_valid, y_valid, X_test, y_test

In [1]:
# THE FUNCTION PRE-PROCESSES THE INPUT DATA.

def preProcessing(data,trainProp):
    # Input:  Input data.
    #         Proportion of the data for training.
    # Output: Matrix of training examples X_train and its labels y_train.
    #         Matrix of validation examples X_valid and its labels y_valid.
    #         Matrix of test examples X_test and its labels y_test.
    
    labels = [209,241,253,312]
    data.drop(labels,axis=0,inplace=True)
    data.loc[data['Gender']=='Male','Gender'] = 0
    data.loc[data['Gender']=='Female','Gender'] = 1
    data.loc[data['Dataset']==2,'Dataset'] = 0
    
    X = data.drop('Dataset',axis=1)
    y = data['Dataset'][:]
    
    X = np.array(X[:][:],dtype=np.float64)
    y = np.array(y[:][:],dtype=np.float64)
    
    X, y = X.T, y.reshape(1,y.shape[0])    
    X_norm = normInput(X)
    
    X_shuf, y_shuf = randomShuffle(X_norm,y)    
    X_train, y_train, X_valid, y_valid, X_test, y_test = splitData(X_shuf,y_shuf,trainProp)
    
    return X_train, y_train, X_valid, y_valid, X_test, y_test