# Sci-kit Learn Basics

### Import all the required modules

In [9]:
import numpy as np
from sklearn import preprocessing 

### Binarization

In [10]:
input_data = np.array([[5.1, -2.9, 3.3], 
                       [-1.2, 7.8, -6.1], 
                       [3.9, 0.4, 2.1], 
                       [7.3, -9.9, -4.5]]) 
# Binarize data  
data_binarized = preprocessing.Binarizer(threshold=2.1).transform(input_data) 
print("\nBinarized data:\n", data_binarized) 


Binarized data:
 [[1. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]]


`preprocessing.Binarizer(threshold=2.1).transform(input_data)` passes all the elements in the array `input_data` into the function $$f(x) = \begin{cases} 1.0 \text{ if } x \geq 2.1 \\ 0.0 \text{ otherwise}  \end{cases}$$

### Normalization

In [11]:
input_data = np.array([[5.1, -2.9, 3.3], 
                       [-1.2, 7.8, -6.1], 
                       [3.9, 0.4, 2.1], 
                       [7.3, -9.9, -4.5]]) 
# Print mean and standard deviation 
print("\nBEFORE:") 
print("Mean =", input_data.mean(axis=0)) 
print("Std deviation =", input_data.std(axis=0)) 

print("\n Before Normalizaion: \n", input_data)

# Remove mean 
data_scaled = preprocessing.scale(input_data) 
print("\nAFTER:") 
print("Mean =", data_scaled.mean(axis=0)) 
print("Std deviation =", data_scaled.std(axis=0)) 

print("\nNormalized data: \n",data_scaled)


BEFORE:
Mean = [ 3.775 -1.15  -1.3  ]
Std deviation = [3.12039661 6.36651396 4.0620192 ]

 Before Normalizaion: 
 [[ 5.1 -2.9  3.3]
 [-1.2  7.8 -6.1]
 [ 3.9  0.4  2.1]
 [ 7.3 -9.9 -4.5]]

AFTER:
Mean = [1.11022302e-16 0.00000000e+00 2.77555756e-17]
Std deviation = [1. 1. 1.]

Normalized data: 
 [[ 0.42462551 -0.2748757   1.13244172]
 [-1.59434861  1.40579288 -1.18167831]
 [ 0.04005901  0.24346134  0.83702214]
 [ 1.12966409 -1.37437851 -0.78778554]]


In [12]:
## numpy equivalence
mean = np.mean(input_data, axis= 0)
sd = np.std(input_data, axis=0)

processed = (input_data - mean)/(sd)
print("Mean =", processed.mean(axis=0)) 
print("Std deviation =", processed.std(axis=0)) 
print("\nNormalized data: \n",processed)


Mean = [1.11022302e-16 0.00000000e+00 2.77555756e-17]
Std deviation = [1. 1. 1.]

Normalized data: 
 [[ 0.42462551 -0.2748757   1.13244172]
 [-1.59434861  1.40579288 -1.18167831]
 [ 0.04005901  0.24346134  0.83702214]
 [ 1.12966409 -1.37437851 -0.78778554]]
