## Project :: Feature scaling of a randomized (1000 X 20) array using Mean Normalization 
   
   ### The equation for mean normalization 

$\mbox{Norm_Col}_i = \frac{\mbox{Col}_i - \mu_i}{\sigma_i}$

Here,

i            = i'th column of the array

${\mu_i}$    = Average of the values in each column of array

${\sigma_i}$ = Standard Deviation of the values in each column of array

x             = (1000 X 20) array

x_norm        = mean normalization

avg_cols      = ${\mu_i}$

std_cols      = ${\sigma_i}$


In [1]:
import numpy as np

In [2]:
x=np.random.randint(0,5001,size=(1000,20))

In [3]:
print(np.shape(x))

(1000, 20)


In [4]:
avg_cols=np.average(x,axis=0)

In [5]:
avg_cols

array([2484.945, 2584.448, 2522.358, 2552.834, 2538.862, 2600.416,
       2468.53 , 2501.345, 2514.386, 2485.334, 2508.927, 2523.448,
       2517.889, 2457.328, 2436.208, 2468.781, 2428.239, 2453.013,
       2396.276, 2476.136])

In [6]:
std_cols=np.std(x,axis=0)

In [7]:
std_cols

array([1449.21189685, 1413.26974329, 1456.32437796, 1439.0289123 ,
       1448.19535248, 1439.94563055, 1451.10079633, 1460.37155408,
       1411.61423236, 1421.34866604, 1412.61992683, 1422.84412333,
       1430.80538323, 1441.66728076, 1442.40264515, 1432.31601019,
       1448.86027134, 1461.97413412, 1435.32809135, 1418.59284628])

In [8]:
np.shape(avg_cols)

(20,)

In [9]:
np.shape(std_cols)

(20,)

In [10]:
x_norm=(x-avg_cols)/std_cols

In [12]:
avg_x_norm=np.average(x_norm)
print(avg_x_norm)

2.1316282072803004e-18


In [13]:
avg_min_x_norm_col=[]
for i in range(20):
    avg_min_x_norm_col.append(min(x_norm[:,i]))


In [14]:
print(avg_min_x_norm_col)

[-1.712617047508871, -1.8237480935534591, -1.729942887814461, -1.7739977134402791, -1.7489781303714964, -1.803134746834709, -1.7011430262107206, -1.7128141074827556, -1.7797964503361414, -1.747167362477453, -1.772541185669695, -1.7735238587438273, -1.756974798575019, -1.6996487557843776, -1.6792869925359455, -1.721534202272496, -1.6759649277618103, -1.6744571212789217, -1.6674069255801762, -1.7370283562824778]


In [15]:
avg_max_x_norm_col=[]
for i in range(20):
    avg_max_x_norm_col.append(max(x_norm[:,i]))

In [16]:
print(avg_max_x_norm_col)

[1.734773917784203, 1.7049484087818356, 1.7012981705856793, 1.6984828998941859, 1.6987611483357057, 1.6622738728568955, 1.7355582784980992, 1.7109721104974827, 1.7530384316550565, 1.7671005433197045, 1.7620259722581468, 1.7405645210124179, 1.7326682084491851, 1.7553786742421271, 1.7767521493509972, 1.7672210475877563, 1.7701920956262165, 1.7407879801750559, 1.8091501278666806, 1.768558192427805]


In [17]:
np.shape(avg_min_x_norm_col)

(20,)

In [18]:
np.shape(avg_max_x_norm_col)

(20,)

In [19]:
xx=np.arange(1,10).reshape(3,3)

In [20]:
xx

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [21]:
avg_cols2=np.average(xx,axis=0)

In [22]:
avg_cols2

array([4., 5., 6.])

# Project :: Data separation

In [24]:
np.shape(x)[0]

1000

In [98]:
row_indices=np.random.permutation(np.shape(x)[0])

In [99]:
# Create a Training Set with 60% data
x_train_start_rows=0
x_train_end_rows=x_train_start_rows+int(np.shape(x)[0] * 60/100)
X_train = x_norm[row_indices[x_train_start_rows:x_train_end_rows],:]
# Create a Cross Validation Set with 20% data
x_crossVal_start_rows=x_train_end_rows
x_crossVal_end_rows=x_crossVal_start_rows+int(np.shape(x)[0] * 20/100)
X_crossVal = x_norm[row_indices[x_crossVal_start_rows:x_crossVal_end_rows],:]

# Create a Test Set with 20% data
x_test_start_rows=x_crossVal_end_rows
x_test_end_rows=x_test_start_rows+int(np.shape(x)[0] * 20/100)
X_test = x_norm[row_indices[x_test_start_rows:x_test_end_rows],:]

In [100]:
np.shape(X_train)

(600, 20)

In [101]:
np.shape(X_crossVal)

(200, 20)

In [102]:
np.shape(X_test)

(200, 20)

In [69]:
xx=np.array([[1,2,3],[4,5,6],[7,8,9]])

In [90]:
i=[1,2,3,4]

In [91]:
xx

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [96]:
xx[i[:2]]

array([[4, 5, 6],
       [7, 8, 9]])