In [72]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [2]:
# One hot encoding in python without using pandas and sklearn
def onehot_encoding(x):
    classes_set = sorted(list(set(x)))
    classes_dict = dict()
    classes = 0
    for i in classes_set:
        if i not in classes_dict.keys():
            classes_dict[i] = classes
            classes += 1
    one_hot = np.zeros(shape=(len(x), classes))
    for i in range(len(x)):
        one_hot[i][classes_dict[x[i]]] = 1
    return one_hot.astype('int')


In [3]:
def compute_Loss(y_arg,x_arg,function,matrix=False,weights=None):
    # if the given data is not a matrix
    if matrix is False:
        loss=np.sum(np.square(y_arg-function(x_arg)))
    else:
        loss=np.sum(np.square(y_arg-function(x_arg,weights)))
    return loss

In [4]:
data=[(2,2),(2,1),(5,9),(-3,-10),(0,-5),(3,3)]
all_x=np.array(list(map(lambda l:l[0],data)))
all_y=np.array(list(map(lambda l:l[1],data)))
# now let's count the MSE of this given data, function is y = 3x − 5
my_function=lambda x:x * 3 - 5
Loss=compute_Loss(all_y,all_x,my_function)
Loss

19

In [5]:
# Now let's compute Loss Function for matrix X, weights and matrix y
x=np.array([2,0,2,2,5,3,3,5,0,3,1,2,2,8,7,6,7,1,3,2]).reshape((5,4))
y=np.array([1,2,3,5,8])
# The first coordinate is w0 => we need to add ones to x vertically
x_changed=np.hstack((np.ones((x.shape[0],1)),x))

print(x_changed,end='\n\n')
w=np.array([-1,1,1,-1,1])
my_function=lambda a,b:np.dot(a,b)

# count Loss
Loss_matrix=compute_Loss(y,x_changed,my_function,matrix=True,weights=w)
Loss_matrix

[[1. 2. 0. 2. 2.]
 [1. 5. 3. 3. 5.]
 [1. 0. 3. 1. 2.]
 [1. 2. 8. 7. 6.]
 [1. 7. 1. 3. 2.]]


62.0

In [34]:
# There is a task to find analytical solution for matrix X and answers Y
X=np.array([1,9,6,3,3,1,9,6,6,3,1,9,9,6,3,1,1,0,1,0]).reshape((5,4))
# Our X matrix is shaped 5x4, y has a shape of 5x1 => we need to stack ones to matrix X
X=np.hstack((np.ones((x.shape[0],1)),X))
y=np.array([2,1,3,-1,1])
# print(X.shape,y.shape)

# Formula:  optimal weights = (X.T*X)^-1*X.Ty
optimal_weights=np.dot(np.dot(np.linalg.inv(np.dot(X.T, X)), X.T), y)
# round numbers to 2 decimals
for i in optimal_weights:
    print(round(i,2), end=' ')

1.53 -0.32 0.1 -0.21 0.37 

In [57]:
# For the same data let's add a L2 regularization with lambda = 1
# |w|^2 = w1^2 + w2^2 + ...
lamb=1
# We add new parameters to our weights
# regularized weights = (𝑋.T * 𝑋+ 𝜆^2 * 𝐸)^-1 * 𝑋.T * 𝑦
X_shape_x,X_shape_y=X.shape
# E is a 
E = np.eye(X_shape_y)
regularized_weights=np.dot(np.dot(np.linalg.inv(np.dot(X.T, X)+np.dot(E,np.square(lamb))),X.T),y)
for i in regularized_weights:
    print(round(i,2),end=' ')

0.66 -0.25 0.13 -0.14 0.39 

In [69]:
# Here is a function which has the same functionality as a MinMaxScaler from sklearn library
def minmax_scale(matrix):
    max_values=matrix.max(axis=1,keepdims=True)
    min_values=matrix.min(axis=1,keepdims=True)
    matrix=(matrix-min_values)/(max_values-min_values)
    return matrix

In [70]:
X = np.array([[1, 2], [2, 1]])
print(minmax_scale(X))

[[0. 1.]
 [1. 0.]]


In [82]:
data = np.array([[-1, 3, 3], [-0.5, 2, 3], [0, 2, 3], [1, 2, 3]])
scaler = MinMaxScaler()
scaler.fit(data)
print(scaler.transform(data))

[[0.   1.   0.  ]
 [0.25 0.   0.  ]
 [0.5  0.   0.  ]
 [1.   0.   0.  ]]
