# Random

In [36]:
import numpy as np
import sklearn.preprocessing as skp

In [37]:
from numpy.random import rand, randint, randn, normal
print ("Random int",randint(1,10))
print ("Random uniform",rand(4))
print ("Random standard normal",randn(4))
mean=50
stdev = 10
shape=[3,3]
print ("Random gaussian\n",normal(mean,stdev,shape))

Random int 4
Random uniform [0.61052555 0.41452011 0.95327869 0.52771592]
Random standard normal [1.05553197 1.18901639 1.31887056 0.19188414]
Random gaussian
 [[42.69617882 66.36229435 31.96753908]
 [37.89732808 48.95597181 22.98852073]
 [69.65967293 41.08342816 45.98301434]]


# Norms
https://hadrienj.github.io/posts/Deep-Learning-Book-Series-2.5-Norms/

Norms are zero for zero-vector but otherwise positive.  
Norms obey triangle inequality ||a||+||b||<=||c||.  
Norms scale linearly: ||k * vec|| == ||k|| * ||vec||.  
L0 norm (not a norm) = number of non-zero elements.  
L1 norm = sum of abs of elements. Discriminates small values.  
L2 norm = Euclidian norm = sqrt of sum of square of elements. Discriminates large values.  
Linfinity = Lmax = abs of max of elements.   
Squared Euclidian norm = L2 squared. Efficient: avoids the sqrt.  
Frobenias norm of matrix = L2 norm of matrix flattened to a vector.  

In [54]:
feature1=normal(40,10,100)
print("My L2",np.sqrt((feature1**2).sum()))
print("np L2",np.linalg.norm(feature1))

My L2 404.7102094050317
np L2 404.7102094050318


In [57]:
print("Squared Euclidian,",(feature1**2).sum())
print("Squared Euclidian,",(feature1.T.dot(feature1)))
print("Squared Euclidian,",np.linalg.norm(feature1)**2)

Squared Euclidian, 163790.35359666465
Squared Euclidian, 163790.35359666467
Squared Euclidian, 163790.35359666467


# Normalization

In [61]:
def standardize(vec):
    center = vec - np.mean(vec)
    unit = center/np.std(vec)
    return unit
def show_stats(desc,vec):
    print(desc)
    print("Max %.2f Min %.2f Mean %.2f Std %.2f"%
          (max(vec),min(vec),np.mean(vec),np.std(vec)))

In [62]:
feature1=normal(40,10,100)
show_stats("Before",feature1)
standard1=standardize(feature1)
show_stats("Centered, unit variance",standard1)

Before
Max 65.36 Min 9.15 Mean 39.68 Std 11.30
Centered, unit variance
Max 2.27 Min -2.70 Mean -0.00 Std 1.00


# Covariance

In [63]:
feature1=normal(40,10,100)
feature2=normal(60,10,100)
print("Covariance\n",np.cov(feature1,feature2))

Covariance
 [[102.02464872   1.40235715]
 [  1.40235715  98.74407787]]


In [64]:
standard1=standardize(feature1)
standard2=standardize(feature2)
print("Covariance\n",np.cov(standard1,standard2))

Covariance
 [[1.01010101 0.01411287]
 [0.01411287 1.01010101]]


In [94]:
def my_cov(vecs,normalize=False):
    I=len(vecs)
    J=len(vecs[0])
    means=np.mean(vecs,axis=1)
    print(means)
    C=np.zeros([I,J])
    for i in range(I):
        for j in range(J):
            ihat = means[i]
            jhat = means[j]
            sum = 0.
            for k in range(J):
                ielem = vecs[i][k]
                jelem = vecs[j][k]
                sum = sum + (ihat-ielem)*(jhat-jelem)
            C[i,j] = sum
            if normalize:
                sum = sum /(I+1)
    return C
mydata=[[3, 5], [8, 6]]
print(my_cov(mydata))
print(np.cov(mydata))

[4. 7.]
[[ 2. -2.]
 [-2.  2.]]
[[ 2. -2.]
 [-2.  2.]]


In [None]:
for i in 0,1:
    for j in 0,1:
        C[i][j]=3
print(C)

# Correlation
