In [18]:
import numpy as np
from scipy.stats import norm
from scipy.stats import multivariate_normal
data = np.array([
    [1, 1, 0],
    [1, 1, 5],
    [0, 2, 4],
    [1, 2, 3],
    [2, 0, 7],
    [1, 1, 1],
    [2, 0, 2],
    [0, 2, 9],
    [2, 0, 0],
    [1, 2, 1]
])
target = np.array([1, 3, 2, 0, 6, 4, 5, 7, 2, 4])
data_train = data[:8,:]
data_test = data[8:,:]
target_train = target[:8]
target_test = target[8:]

#data = np.array([[41.9, 29.1],[43.4, 29.3],[43.9, 29.5],[44.5, 29.7],[47.3, 29.9],[47.5, 30.3],[47.9, 30.5],[50.2, 30.7],[52.8, 30.8],[53.2, 30.9],[56.7, 31.5],[57.0, 31.7],[63.5, 31.9],[65.3, 32.0],[71.1, 32.1],[77.0, 32.5],[77.8, 32.9])
#target = np.array([251.3, 251.3, 248.3, 267.5, 273.0, 276.5, 270.3, 274.9, 285.0, 290.0, 297.0, 302.5, 304.5, 309.3, 321.7, 330.7, 349.0])
#data_train = data
#target_train = target

# Polynomial Regression

## Basis Function

In [19]:
def basis_function(data,i,j):
    if j == 0: return 1
    #return data[i][j-1]
    return np.linalg.norm(data[i])**j

## Design Matrix

In [20]:
M = 3
n = len(data_train)
design_matrix = np.array([[1 for i in range(M+1)] for j in range(n)], dtype=np.float32)
for i in range(n):
    for j in range(M+1):
        design_matrix[i][j] = basis_function(data_train, i, j)    
print("Design Matrix\n",design_matrix)

Design Matrix
 [[  1.          1.4142135   2.          2.828427 ]
 [  1.          5.196152   27.        140.29611  ]
 [  1.          4.472136   20.         89.44272  ]
 [  1.          3.7416575  14.         52.3832   ]
 [  1.          7.28011    53.        385.84583  ]
 [  1.          1.7320508   3.          5.196152 ]
 [  1.          2.828427    8.         22.627417 ]
 [  1.          9.219544   85.        783.66125  ]]


## Weights

In [21]:
weights = np.array([1 for j in range(n)], dtype=np.float32)
#print(np.matmul(design_matrix.T,design_matrix))
weights = np.matmul(np.matmul(np.linalg.inv(np.matmul(design_matrix.T,design_matrix)),design_matrix.T),target_train)
print("w -> ", weights)

w ->  [ 4.58341172 -1.68710379  0.33773655 -0.01330492]


In [22]:
import math
RMSE = 0
for i in range(len(data_train)):
    func_xi = np.array([basis_function(data_train, i, j) for j in range(M+1)])
    #print(func_xi)
    #print(target_train[i], np.matmul(func_xi.T, weights))
    RMSE += (target_train[i] - np.matmul(func_xi.T, weights))**2
RMSE = math.sqrt(RMSE/n)
print("RMSE ->", RMSE)

RMSE -> 1.5699807121071832


# Decision Tree

In [23]:
split = np.median(data_train[:,2])
for i in range(len(data_train)):
    if target_train[i] < 4: target_train[i] = 0
    else: target_train[i] = 1
    if data_train[i,2] <= split: data_train[i,2] = 0
    else: data_train[i,2] = 1

In [40]:
from math import log2
from scipy.stats import entropy
p_c0 = np.count_nonzero(target_train == 0)/len(target_train)
p_c1 = np.count_nonzero(target_train == 1)/len(target_train)
#I_data_train = -p_c0*math.log2(p_c0)-p_c1*math.log2(p_c1)
print(entropy([p_c0,p_c1], base=2))
#print(I_data_train)

y10_filter = [data_train[:,0][i] == 0 for i in range(len(data_train))]
y11_filter = [data_train[:,0][i] == 1 for i in range(len(data_train))]
y12_filter = [data_train[:,0][i] == 2 for i in range(len(data_train))]
y20_filter = [data_train[:,1][i] == 0 for i in range(len(data_train))]
y21_filter = [data_train[:,1][i] == 1 for i in range(len(data_train))]
y22_filter = [data_train[:,1][i] == 2 for i in range(len(data_train))]
y30_filter = [data_train[:,2][i] == 0 for i in range(len(data_train))]
print(target_train[y11_filter])

p_y10 = np.count_nonzero(data_train[:,0] == 0)/len(data_train) 
p_y11 = np.count_nonzero(data_train[:,0] == 1)/len(data_train) 
p_y12 = np.count_nonzero(data_train[:,0] == 2)/len(data_train) 
p_y20 = np.count_nonzero(data_train[:,1] == 0)/len(data_train) 
p_y21 = np.count_nonzero(data_train[:,1] == 1)/len(data_train) 
p_y22 = np.count_nonzero(data_train[:,1] == 2)/len(data_train) 
p_y30 = np.count_nonzero(data_train[:,2] == 0)/len(data_train) 
p_y31 = np.count_nonzero(data_train[:,2] == 1)/len(data_train) 
p_c0_y10 = np.count_nonzero(target_train[y10_filter] == 0)/len(target_train[y10_filter])
p_c0_y11 = np.count_nonzero(target_train[y11_filter] == 0)/len(target_train[y11_filter])
p_c0_y12 = 1 - p_c0_y10 - p_c0_y11
print(p_c0_y12, p_c0_y11, p_c0_y10)
p_y10_c1 = np.count_nonzero(data_train[c1_filter][:,0] == 0)/len(data_train[c1_filter])
p_y11_c1 = np.count_nonzero(data_train[c1_filter][:,0] == 1)/len(data_train[c1_filter])
p_y12_c1 = np.count_nonzero(data_train[c1_filter][:,0] == 2)/len(data_train[c1_filter])
p_y20_c0 = np.count_nonzero(data_train[c0_filter][:,1] == 0)/len(data_train[c0_filter])
p_y21_c0 = np.count_nonzero(data_train[c0_filter][:,1] == 1)/len(data_train[c0_filter])
p_y22_c0 = np.count_nonzero(data_train[c0_filter][:,1] == 2)/len(data_train[c0_filter])
p_y20_c1 = np.count_nonzero(data_train[c1_filter][:,1] == 0)/len(data_train[c1_filter])
p_y21_c1 = np.count_nonzero(data_train[c1_filter][:,1] == 1)/len(data_train[c1_filter])
p_y22_c1 = np.count_nonzero(data_train[c1_filter][:,1] == 2)/len(data_train[c1_filter])
p_y20_c0 = np.count_nonzero(data_train[c0_filter][:,2] == 0)/len(data_train[c0_filter])
p_y21_c0 = np.count_nonzero(data_train[c0_filter][:,2] == 1)/len(data_train[c0_filter])
p_y20_c1 = np.count_nonzero(data_train[c1_filter][:,2] == 0)/len(data_train[c1_filter])
p_y21_c1 = np.count_nonzero(data_train[c1_filter][:,2] == 1)/len(data_train[c1_filter])



1.0
[0 0 0 1]
-0.25 0.75 0.5
