In [44]:
import numpy as np

filename = "dataset/trainData.txt"

numFeatures = 6

In [45]:
def loadDataSet(path):
    rawData = np.genfromtxt(path, delimiter=',', dtype = "str")

    #extract the 6 feature for each footprint
    features = rawData[:, 0:numFeatures]
    features = np.array(features, dtype= np.float64).T

    #extract the labels: 1->True, 0->False
    labels = rawData[:, -1]

    return features, labels



In [46]:
features, labels = loadDataSet(filename)

print(f"features shape: {features.shape}\nLabels shape: {labels.shape}")

features shape: (6, 6000)
Labels shape: (6000,)


features: columns are samples, rows are the 6 features

In [47]:
features[:, :10] #feature 0 to 5, first 10 samples

array([[ 1.85284048, -0.21530775,  0.04653456, -0.69846824, -2.13449634,
        -0.18678977,  0.37954862,  0.25194121,  1.02611325,  0.12159774],
       [-0.16436898,  1.63665974, -1.16487257, -0.7313877 , -0.94679643,
        -0.35105166,  0.07035488, -0.536285  ,  0.84958757, -0.44862065],
       [ 1.09839078, -0.61831145,  0.04392191,  0.22682514,  0.7331265 ,
         0.25874009,  0.74192792,  0.98317697, -0.34210771, -0.26330183],
       [-0.93073689,  0.99002204, -0.41369086,  0.1487948 , -3.50433306,
         0.82430276,  0.21831212, -1.43683554,  0.84965311, -0.31580565],
       [-1.0193342 ,  0.38670252, -1.08847926, -1.44462156, -1.82338742,
        -1.24295012,  1.30277214,  0.74408647,  0.12493949,  1.42245348],
       [ 1.16696309,  1.15926273, -1.37988988,  1.17573859,  0.90464917,
        -0.09511962, -1.32719263,  1.43401058,  0.69336959, -1.00038176]])

## Features normalization 

In [36]:
#Compute mean
mu = features.mean(axis=1) #shape is (6, )

print(f"Mu before reshaping: {mu}\n")
#Reshape the mean to subtract it to the features
mu = mu.reshape((mu.shape[0], 1))   #(6, 1)

print(f"Mu after reshaping:\n {mu}\n")

Mu before reshaping: [ 0.00170711  0.00503903 -0.00560753  0.00109537 -0.00700025  0.00910515]

Mu after reshaping:
 [[ 0.00170711]
 [ 0.00503903]
 [-0.00560753]
 [ 0.00109537]
 [-0.00700025]
 [ 0.00910515]]



In [37]:
#Std Dev
std = features.std(1)

std.shape

(6,)

In [39]:
print(f"Features shape: {features.shape}\nMu shape: {mu.shape}\nStd shape: {std.shape}")

Features shape: (6, 6000)
Mu shape: (6, 1)
Std shape: (6,)


In [41]:
#I have to reshape the std!
std = std.reshape((std.shape[0], 1))
std.shape

(6, 1)

In [50]:
#Normalization

normalizedFeatures = (features - mu) / std

print(f"Shape of normaliized features: {normalizedFeatures.shape}\nShape of original features:{features.shape}")
print(f"Mean:\n {mu}\n Std:\n {std}")


#show all the 6 features for sample 0, before and after normalzation
print(f"features, sample0 before norm: {features[:, 0]}\nfeature, sample0 after norm: {normalizedFeatures[:, 0]}") 

Shape of normaliized features: (6, 6000)
Shape of original features:(6, 6000)
Mean:
 [[ 0.00170711]
 [ 0.00503903]
 [-0.00560753]
 [ 0.00109537]
 [-0.00700025]
 [ 0.00910515]]
 Std:
 [[1.00067129]
 [0.99917601]
 [1.00124013]
 [0.99513511]
 [1.00059856]
 [0.99861091]]
features, sample0 before norm: [ 1.85284048 -0.16436898  1.09839078 -0.93073689 -1.0193342   1.16696309]
feature, sample0 after norm: [ 1.84989155 -0.16954771  1.1026309  -0.93638768 -1.01172838  1.15946855]
