# NumPy Introduction 

In [1]:
import numpy as np

### Basics  

In [2]:
a = np.array([11,22,33])
print(a)

[11 22 33]


In [3]:
b = np.array([[2,5,11,7],[6,2,9,17]])
print(b)
print("\n")

c = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]],[[13,14,15],[16,17,18]]])
print(c)

[[ 2  5 11  7]
 [ 6  2  9 17]]


[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]

 [[13 14 15]
  [16 17 18]]]


In [4]:
# Get Dimension  
print(b.ndim)
print("\n")

# Get Shape 
(K,M) = b.shape
L,B,H = c.shape
print(K,M)
print(L,B,H)
print("\n")

# Get Type
print(b.dtype)
print("\n")

# Total no. of entries in the array
print(c.size)
print("\n")

# To Specify Datatype when building array: 
d = np.array([4.5,2.6,7.7], dtype = 'int32')
print(d.dtype)
print("\n")

2


2 4
3 2 3


int64


18


int32




In [184]:
# Create an array from 1 to 10
seq = np.arange(1, 11)
print(seq[4:])
print("\n")

# Create an array from 0 to 1 spaced by 0.1
seq_2 = np.arange(0, 1.1, 0.1)

[ 5  6  7  8  9 10]




### Accessing/ Changing specific elements, rows, coloumns, etc 

In [6]:
a = np.array([[1,2,3,4,5,6,7],[8,9,10,11,12,13,14]])
print(a)
print("\n")

# Get a specific element [row, col]
a[0,4]
a.item(0,4)

# Get a Specific row
a[1, :]

# Get a specific coloumn 
a[: , 4]

# Changing values
a[0,0] = 20
print(a)
print(np.put(a,[0,7],[90,90]))
print("\n")

# Changing rows or cols
a[ :, 3]= [89,89]
print(a)
print("\n")

a[1, :]= [5]
print(a)
print("\n")

a[1, :]= [1,3,4,5,6,7,7]
print(a)
print("\n")

# Flipping Array 
print(a[::-1])   #Works for any dimension
print("\n")

# All Evens Numbers
print(a[a%2==0])
print("\n")

# Unique Values 
print(np.unique(a))

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]


[[20  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]
None


[[90  2  3 89  5  6  7]
 [90  9 10 89 12 13 14]]


[[90  2  3 89  5  6  7]
 [ 5  5  5  5  5  5  5]]


[[90  2  3 89  5  6  7]
 [ 1  3  4  5  6  7  7]]


[[ 1  3  4  5  6  7  7]
 [90  2  3 89  5  6  7]]


[90  2  6  4  6]


[ 1  2  3  4  5  6  7 89 90]


In [7]:
# Get specific element in 3-d 
print(c)

# To get 3: 
print(c[0,0,2])

# To get 8: 
print(c[1,0,1])

# To get 16: 
print(c[2,1,0])

# Take columns 2, 3 of both rows
print(b)
print(b[:, [1, 2]])

#(work outside in : specify dimension, specify specify row and col)
# To get col in 3D
print(c[1,:, 0])

# To get row in 3D
print(c[0,1,:])

[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]

 [[13 14 15]
  [16 17 18]]]
3
8
16
[[ 2  5 11  7]
 [ 6  2  9 17]]
[[ 5 11]
 [ 2  9]]
[ 7 10]
[4 5 6]


### Initializinig Different Types of Arrays

In [8]:
# All 0's matrix 
x = np.zeros((2,3))
x

array([[0., 0., 0.],
       [0., 0., 0.]])

In [9]:
# All 1's matrix
print(np.ones((4,5)))
print(np.ones((4,5,3)))

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
[[[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]]


In [10]:
# Any other number 
np.full((3,3), 99)

array([[99, 99, 99],
       [99, 99, 99],
       [99, 99, 99]])

In [11]:
# Random Decimal Numbers 
print(np.random.rand(4,2))

np.random.random_sample(a.shape)

[[0.68502291 0.47263226]
 [0.54429488 0.33037806]
 [0.36718096 0.26714637]
 [0.59823909 0.38088088]]


array([[0.68592502, 0.34064101, 0.27300982, 0.1727535 , 0.48130378,
        0.29393847, 0.51193759],
       [0.99489613, 0.02043721, 0.26575013, 0.17522839, 0.68687392,
        0.21579866, 0.37520731]])

In [12]:
# Random Integer Value 
np.random.randint(-2,10, size=(4,4))

array([[-1,  3,  0,  9],
       [ 1,  7,  2,  3],
       [ 8,  1,  5,  0],
       [ 3,  4, -2,  3]])

In [13]:
# Identity Matrix 
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [14]:
# Interesting Array Creation 
output = np.ones((5,5))
print(output)

z = np.zeros((3,3))
z[1,1] = 9
print(z)

output[1:4, 1:4] = z
print(output)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
[[0. 0. 0.]
 [0. 9. 0.]
 [0. 0. 0.]]
[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


####  Be careful about copying np arrays 

In [15]:
a = np.array([1,2,3])
b=a # b will point to the same location as a does, so: 
b[0] = 100
print(a)
print(b)

# Instead np.copy 
y = np.array([1,2,3])
k = np.copy(y)
k[0]=100
print(y)
print(k)

[100   2   3]
[100   2   3]
[1 2 3]
[100   2   3]


## Mathematics 

In [16]:
a = np.array([1,2,3,4])
print(a)

[1 2 3 4]


In [17]:
# Element-wise operation
print(a + 2)
print(a - 2)
print(a * 2)
print(a /2)

[3 4 5 6]
[-1  0  1  2]
[2 4 6 8]
[0.5 1.  1.5 2. ]


In [18]:
b = np.array([8,4,5,6])
print(a+b)
print(a*b)
print(a/b)
print(a**2)

# Linear Algebra : Matrix Multiplication
print(a.reshape(2,2))
print(b.reshape(2,2))
print(np.matmul(a.reshape(2,2),b.reshape(2,2)))

[ 9  6  8 10]
[ 8  8 15 24]
[0.125      0.5        0.6        0.66666667]
[ 1  4  9 16]
[[1 2]
 [3 4]]
[[8 4]
 [5 6]]
[[18 16]
 [44 36]]


#### Reorganizing Arrays 


In [19]:
# Reshaping Arrays
before = np.array([[1,2,3,4],[6,7,34,2]])
print(before)
print("\n")

after = before.reshape(8,1)
print(after)
print("\n")

# Transposing
print(before.T)
print("\n")

# Flatten  Array 
#by col
print(before.flatten('F'))

#by row 
print(before.flatten())
print("\n")


[[ 1  2  3  4]
 [ 6  7 34  2]]


[[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 6]
 [ 7]
 [34]
 [ 2]]


[[ 1  6]
 [ 2  7]
 [ 3 34]
 [ 4  2]]


[ 1  6  2  7  3 34  4  2]
[ 1  2  3  4  6  7 34  2]




In [20]:
# Vertically Stacking vectors 
v1 = np.array([1])
v2= np.array([5])

print(np.vstack([v1,v2]))
print(np.vstack([v1,v2,v1,v2,v2]))

[[1]
 [5]]
[[1]
 [5]
 [1]
 [5]
 [5]]


In [21]:
# Horizontal Stack 
h1 = np.array([1,2,3,4])
h2= np.array([5,6,7,8])

print(np.hstack([h1,h2]))
print(np.hstack([h1,h2,h1,h2,h2]))

[1 2 3 4 5 6 7 8]
[1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 5 6 7 8]


In [22]:
# Row-stack and Col-stack 
r = np.random.randint(4, size =(1,4))
s = np.random.randint(4, size =(1,4))
print(r)
print(s)
print(np.row_stack([r,s]))
np.column_stack([r,s])

[[1 3 0 1]]
[[0 2 0 2]]
[[1 3 0 1]
 [0 2 0 2]]


array([[1, 3, 0, 1, 0, 2, 0, 2]])

In [23]:
# Horizontal Splitting, Vertical Splitting. 
r = np.random.randint(333, size =(5,5))
print(r)
print("\n")
print(np.hsplit(r,5))
print("\n")
print(np.vsplit(r,5))
print("\n")
print(np.hsplit(r,(4,4)))

[[ 71 179  22   5 245]
 [ 78 140 311 290 180]
 [280 121 143 120  68]
 [130 149 215  43  41]
 [248 265 186 157 123]]


[array([[ 71],
       [ 78],
       [280],
       [130],
       [248]]), array([[179],
       [140],
       [121],
       [149],
       [265]]), array([[ 22],
       [311],
       [143],
       [215],
       [186]]), array([[  5],
       [290],
       [120],
       [ 43],
       [157]]), array([[245],
       [180],
       [ 68],
       [ 41],
       [123]])]


[array([[ 71, 179,  22,   5, 245]]), array([[ 78, 140, 311, 290, 180]]), array([[280, 121, 143, 120,  68]]), array([[130, 149, 215,  43,  41]]), array([[248, 265, 186, 157, 123]])]


[array([[ 71, 179,  22,   5],
       [ 78, 140, 311, 290],
       [280, 121, 143, 120],
       [130, 149, 215,  43],
       [248, 265, 186, 157]]), array([], shape=(5, 0), dtype=int64), array([[245],
       [180],
       [ 68],
       [ 41],
       [123]])]


In [24]:
# Repeating an array
arr = np.array([[1,2,3]])
print(arr)
r1 = np.repeat(arr,3, axis=0)
print(r1)

[[1 2 3]]
[[1 2 3]
 [1 2 3]
 [1 2 3]]


In [25]:
# Indexes that would sort an array
R = np.array([6,4,3,7,11])
sort_idx = np.argsort(R)
print(sort_idx)
print("\n")
print(R[sort_idx])
print("\n")

 # in 2D
R = np.array([[6,4,3,7,11],[11,33,42,0,7]])
sort_idx = np.argsort(R[:, 4])
print(sort_idx)
print("\n")
R[sort_idx]

[2 1 0 3 4]


[ 3  4  6  7 11]


[1 0]




array([[11, 33, 42,  0,  7],
       [ 6,  4,  3,  7, 11]])

In [26]:
# Generate Random Permutations 

# generating permutations from 1:N
w = np.random.permutation(7)
print(w)

# generating permutation of arrays
r1 = np.array([3,11,64,75,63])
r2 = np.array([[3,11,64,75,63],[5,23,54,22,66]])
r1_permute = np.random.permutation(r1)
r2_permute = np.random.permutation(r2)
print(r1_permute)
print(r2_permute)

[0 3 2 5 4 6 1]
[11 63 75 64  3]
[[ 3 11 64 75 63]
 [ 5 23 54 22 66]]


In [27]:
#Creating one hot vector. Let class be y.
print("ONE HOT ENCODING")
y = np.array([[2,1,1,2,0]])
print(y)
print("\n")
R,C = y.shape
print(R,C)
print("\n")
y = y.reshape(C)
print(y)
print("\n")

rows = np.arange(0,C)
print(rows)
print("\n")
one_hot_mat = np.zeros([C,np.max(y)+1])
print(one_hot_mat)
print("\n")
one_hot_mat[[rows],[y]]=1
print(one_hot_mat)
    

ONE HOT ENCODING
[[2 1 1 2 0]]


1 5


[2 1 1 2 0]


[0 1 2 3 4]


[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


[[0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]]


#### Loading Data From File 

In [28]:
filedata = np.genfromtxt('data.txt', delimiter=',')
print(filedata)
filedata = filedata.astype('int32')
print(filedata)

[[  1.  13.  21.  11. 196.  75.   4.   3.  34.   6.   7.   8.   0.   1.
    2.   3.   4.   5.]
 [  3.  42.  12.  33. 766.  75.   4.  55.   6.   4.   3.   4.   5.   6.
    7.   0.  11.  12.]
 [  1.  22.  33.  11. 999.  11.   2.   1.  78.   0.   1.   2.   9.   8.
    7.   1.  76.  88.]]
[[  1  13  21  11 196  75   4   3  34   6   7   8   0   1   2   3   4   5]
 [  3  42  12  33 766  75   4  55   6   4   3   4   5   6   7   0  11  12]
 [  1  22  33  11 999  11   2   1  78   0   1   2   9   8   7   1  76  88]]


#### Boolean Masking and Advanced Indexing

In [29]:
filedata > 10

array([[False,  True,  True,  True,  True,  True, False, False,  True,
        False, False, False, False, False, False, False, False, False],
       [False,  True,  True,  True,  True,  True, False,  True, False,
        False, False, False, False, False, False, False,  True,  True],
       [False,  True,  True,  True,  True,  True, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [30]:
filedata >= 1

array([[ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True, False,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True, False,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
        False,  True,  True,  True,  True,  True,  True,  True,  True]])

In [31]:
filedata[filedata > 50]

array([196,  75, 766,  75,  55, 999,  78,  76,  88], dtype=int32)

In [32]:
# We can index with a list in NumPy
a = np.array([1,2,3,4,5,6,7,8,9])
a[[1,2,8]]

b = np.array([[1,2,3,4],[7,8,9,10],[11,12,13,14],[15,16,17,18]])
print(b)

# Suppose we want the diagonal elements 2,9,14
print(b[[0,1,2],[1,2,3]])
# first list for all the rows in which elements exist, second for all the columns in which the elements exist. 

# Suppose I want the matrix: [[3,4],[13,14],[17,18]]
print(b[[0,2,3], :])


[[ 1  2  3  4]
 [ 7  8  9 10]
 [11 12 13 14]
 [15 16 17 18]]
[ 2  9 14]
[[ 1  2  3  4]
 [11 12 13 14]
 [15 16 17 18]]


In [33]:
np.any(filedata >= 50, axis = 0) #if ANY value in the cols is greater than 50

array([False, False, False, False,  True,  True, False,  True,  True,
       False, False, False, False, False, False, False,  True,  True])

In [34]:
np.any(filedata > 50, axis = 0) #if ALL value in the cols is greater than 50

array([False, False, False, False,  True,  True, False,  True,  True,
       False, False, False, False, False, False, False,  True,  True])

In [35]:
((filedata > 50) & (filedata < 50))

array([[False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False]])

In [36]:
(~(filedata > 50) & (filedata < 50)) # NOT : ~

array([[ True,  True,  True,  True, False, False,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True, False, False,  True, False,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True, False,  True,  True,  True, False,
         True,  True,  True,  True,  True,  True,  True, False, False]])

In [37]:
# Entropy of Class Labels
counts = np.array([5,3])
categorical_dist_counts = counts/8   #Computes p_i
categorical_dist_counts = categorical_dist_counts[categorical_dist_counts > 0] #Removes p_i=0
log_counts = np.log2(categorical_dist_counts) #applies log 
entropy = (counts/8)*log_counts#finds entropy. 
entropy = -(np.sum(entropy))
print(entropy)

0.954434002924965


In [38]:
def entropy(y):
     # Get the number of data points per class.
        (counts, _) = np.histogram(y, bins=np.arange(3 + 1))
        # ====================================================
        # TODO: Implement your solution within the box
        # Set the entropy of the unnormalized categorical distribution counts
        # Make sure the case where p_i = 0 is handeled appropriately.
        N,K = y.shape
        print("===")
        print(counts)
        print("===")
        categorical_dist_counts = counts/N                                             #Computes p_i
        categorical_dist_counts = categorical_dist_counts[categorical_dist_counts != 0] #Removes p_i=0
        log_counts = np.log2(categorical_dist_counts)                                  #applies log 
        entropy = (categorical_dist_counts)*log_counts  
        entropy = -(np.sum(entropy))                                                   #finds entropy.
        # ====================================================
        return entropy

X = np.array([[33,34,345,4],[52,56,46,5],[44,687,886,5],[53,678,56,7]])
y = np.array([[2,1,2,0]])
y = y.reshape((4,1))
print(X,y)
print("\n")

split_dim = 3
sort_idx = np.argsort(X[:, split_dim])
X = X[sort_idx]
y = y[sort_idx]
print(X,y)
print("\n")

(unique_values, first_idxes) = np.unique(X[:, split_dim], return_index=True)
print(unique_values,first_idxes)
print(unique_values.shape[0]-1)

# TODO: Implement your solution within the box
 # Initialize variables
current_split_index = 0
current_split_value = X[:,0]
H_left = 0
H_right = 0
H_data=5
current_information_gain = 0
maximum_information_gain = 0 
        
# Iterate over possible split values and find optimal split that maximizes the information gain.
for ii in range(unique_values.shape[0] - 1):
    # Split data by split value and compute information gain.
    print("==================")
    current_split_value = unique_values[ii]
    current_split_index = first_idxes[ii]
    y_left = y[:ii+1,:]
    print(y_left)
    y_right = y[ii+1:,:]
    print(y_right)
    
    H_left= entropy(y_left)
    H_right= entropy(y_right)
    
    current_information_gain = H_data - (H_left + H_right)
    print("==================")

    if True:
        print(f"split (index, value): ({current_split_index}, {current_split_value}), H_data: {H_data}, H_left: {H_left}, H_right: {H_right}, Info Gain: {current_information_gain}")

    # Update maximum information gain when applicable
    if current_information_gain >= maximum_information_gain:
        maximum_information_gain = current_information_gain
    




[[ 33  34 345   4]
 [ 52  56  46   5]
 [ 44 687 886   5]
 [ 53 678  56   7]] [[2]
 [1]
 [2]
 [0]]


[[ 33  34 345   4]
 [ 52  56  46   5]
 [ 44 687 886   5]
 [ 53 678  56   7]] [[2]
 [1]
 [2]
 [0]]


[4 5 7] [0 1 3]
2
[[2]]
[[1]
 [2]
 [0]]
===
[0 0 1]
===
===
[1 1 1]
===
split (index, value): (0, 4), H_data: 5, H_left: -0.0, H_right: 1.584962500721156, Info Gain: 3.415037499278844
[[2]
 [1]]
[[2]
 [0]]
===
[0 1 1]
===
===
[1 0 1]
===
split (index, value): (1, 5), H_data: 5, H_left: 1.0, H_right: 1.0, Info Gain: 3.0


In [39]:
X = np.array([[33,44,28,64],[52,72,73,53],[44,13,21,8],[2,4,5,7]])
y = np.array([[2,1,2,0]])
y = y.reshape((4,1))
print(X,y)
print("\n")


num_data_per_tree = 2
num_features_per_tree = 2

N,D = X.shape

Xy = np.hstack([X,y])
print(Xy)
Xy = np.random.permutation(Xy)
print(Xy)
X_sub = Xy[:num_data_per_tree,:num_features_per_tree]
y_sub = Xy[:num_data_per_tree,D]

print(X_sub,y_sub)



[[33 44 28 64]
 [52 72 73 53]
 [44 13 21  8]
 [ 2  4  5  7]] [[2]
 [1]
 [2]
 [0]]


[[33 44 28 64  2]
 [52 72 73 53  1]
 [44 13 21  8  2]
 [ 2  4  5  7  0]]
[[33 44 28 64  2]
 [ 2  4  5  7  0]
 [52 72 73 53  1]
 [44 13 21  8  2]]
[[33 44]
 [ 2  4]] [2 0]


In [40]:
y = np.array([[1,2,3,4],[3,4,52,2],[21,45,632,55]])
print(y.shape[0])
print(y)
print(y[:,:2])
np.flip(y)
    

3
[[  1   2   3   4]
 [  3   4  52   2]
 [ 21  45 632  55]]
[[ 1  2]
 [ 3  4]
 [21 45]]


array([[ 55, 632,  45,  21],
       [  2,  52,   4,   3],
       [  4,   3,   2,   1]])

In [41]:
print(len([[2],[1],[0]]))

3


In [90]:
X = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
print(X, X.shape)

A = np.empty((3,2))
print(A)

ones = np.ones((X.shape[0],1))

for i in range(1,3):
    ai = X**i
    print(ai)
    ai = np.sum(ai, axis=1)
    ai = ai.reshape((3,1))
    A = np.hstack((A,ai))
    print(A)


A[:,2:]



[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]] (3, 4)
[[ 10.  30.]
 [ 26. 174.]
 [ 42. 446.]]
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
[[ 10.  30.  10.]
 [ 26. 174.  26.]
 [ 42. 446.  42.]]
[[  1   4   9  16]
 [ 25  36  49  64]
 [ 81 100 121 144]]
[[ 10.  30.  10.  30.]
 [ 26. 174.  26. 174.]
 [ 42. 446.  42. 446.]]


array([[ 10.,  30.],
       [ 26., 174.],
       [ 42., 446.]])

In [None]:
sdfkn sdfklm

In [158]:
from sklearn.datasets import make_blobs

X, y = make_blobs(n_samples=20, centers=[(0,0), (5,5), (-5, 5)], random_state=0)

for i in range(3):
    (np.array([X[np.where(y==i)].mean(axis=0)])
          
    

SyntaxError: unexpected EOF while parsing (<ipython-input-158-f54d52cdb4aa>, line 8)

In [98]:
x1 = np.array([[23,34,11],[33,44,53],[5,5,15]])
x2 = np.array([[0.5,0.3,0.6],[0.1,0.5,.9],[1,2,4]])
print(x1)
print(x2)

print(np.cov(x1,x2).shape)

print(np.dot(x1.T,x1).shape)







[[23 34 11]
 [33 44 53]
 [ 5  5 15]]
[[0.5 0.3 0.6]
 [0.1 0.5 0.9]
 [1.  2.  4. ]]
(6, 6)
(3, 3)


In [148]:
c = np.array([[1,2,3,1,1,2,2,2]]).T
print(c.shape)
y = np.array(c.flatten())
t = np.bincount(y)
t = t.reshape((4,1))
t/len(y)

(8, 1)


array([[0.   ],
       [0.375],
       [0.5  ],
       [0.125]])

In [188]:
train_X = np.array([[23,34,11],[33,44,53],[5,5,15],[0.5,0.3,0.6],[0.1,0.5,.9],[1,2,4] ])
train_y = np.array([[1,2,3,2,1,2]]).T
train_y.shape
print(train_X[train_y.flatten()==2,:])
np.mean(train_X[train_y.flatten()==2,:],axis=0,keepdims=True)

print(np.unique(train_y))


[[33.  44.  53. ]
 [ 0.5  0.3  0.6]
 [ 1.   2.   4. ]]
[1 2 3]


In [167]:
from sklearn.datasets import make_blobs

X, y = make_blobs(n_samples=20, centers=[(0,0), (5,5), (-5, 5)], random_state=0)
a = np.ones((3,2))

for i in range(3):
    a[i,:]=np.array([X[y==i,:].std(axis=0)])
    print(np.diagflat(a[i,:].flatten()))

[[0.6853714 0.       ]
 [0.        0.9789976]]
[[1.40218915 0.        ]
 [0.         0.67078568]]
[[0.88192625 0.        ]
 [0.         1.12879666]]


In [205]:
train_X = np.array([[-0.44215767,  0.12654082],
 [-0.69049587,  0.70863265],
 [-0.40942631, -0.3090424 ],
 [-0.69955566, -0.04786335],
 [-1.03264067,  0.12984265],
 [ 0.04555058, -0.54011834],
 [ 0.24066126, -0.96152298],
 [ 0.14036188, -0.89448291],
 [ 0.47246929, -1.06487674],
 [ 0.0990007,  -1.27008879]])

train_y = np.array([[0],
 [0],
 [0],
 [0],
 [0],
 [1],
 [1],
 [1],
 [1],
 [1]])


means = np.array([[-0.65485524,  0.12162207],
 [ 0.19960874, -0.94621795]])

covariances= np.tile(np.eye(2), reps=(2, 1, 1))
print(covariances)

for j in range(2):
    cov_mat = train_X[train_y.flatten()==j] - means[j,:]
    print(cov_mat)
    cov_k = np.matmul(cov_mat.T,cov_mat)
    print(cov_k)
    print(cov_mat.shape[0])
    covariances[j,:,:] = cov_k/4

print(0.25146583/0.06286646)

covariances



[[[1. 0.]
  [0. 1.]]

 [[1. 0.]
  [0. 1.]]]
[[ 0.21269757  0.00491875]
 [-0.03564063  0.58701058]
 [ 0.24542893 -0.43066447]
 [-0.04470042 -0.16948542]
 [-0.37778543  0.00822058]]
[[ 0.25146583 -0.12110229]
 [-0.12110229  0.55887039]]
5
[[-0.15405816  0.40609961]
 [ 0.04105252 -0.01530503]
 [-0.05924686  0.05173504]
 [ 0.27286055 -0.11865879]
 [-0.10060804 -0.32387084]]
[[ 0.11350427 -0.0660497 ]
 [-0.0660497   0.28679988]]
5
3.999999840932669


array([[[ 0.06286646, -0.03027557],
        [-0.03027557,  0.1397176 ]],

       [[ 0.02837607, -0.01651242],
        [-0.01651242,  0.07169997]]])

In [178]:
num_validation_data = min(10 - 1, round(10 * 0.3))
num_validation_data

3

In [180]:
train_X = np.array([[ 0.04555058, -0.54011834],
 [ 0.24066126, -0.96152298],
 [ 0.14036188, -0.89448291],
 [ 0.47246929, -1.06487674],
 [ 0.0990007,  -1.27008879]])

        

           
            
            
        

array([[-0.69955566, -0.04786335],
       [-1.03264067,  0.12984265],
       [ 0.04555058, -0.54011834],
       [ 0.24066126, -0.96152298],
       [ 0.14036188, -0.89448291],
       [ 0.47246929, -1.06487674],
       [ 0.0990007 , -1.27008879]])

In [185]:
t = [1,2,3,4,5,0,3]
np.argmin(t)

5