# Random

In [1]:
import numpy as np
import sklearn.preprocessing as skp

In [2]:
from numpy.random import rand, randint, randn, normal
print ("Random int",randint(1,10))
print ("Random uniform",rand(4))
print ("Random standard normal",randn(4))
mean=50
stdev = 10
shape=[3,3]
print ("Random gaussian\n",normal(mean,stdev,shape))

Random int 3
Random uniform [0.61455673 0.2315741  0.26068267 0.13416399]
Random standard normal [-0.7104818  -1.4674275  -2.38241677  0.08425739]
Random gaussian
 [[50.40696103 33.46130135 64.46145959]
 [44.85938278 51.25848846 60.19007053]
 [43.70914483 62.47507999 41.28414566]]


# Norms
https://hadrienj.github.io/posts/Deep-Learning-Book-Series-2.5-Norms/

Norms are zero for zero-vector but otherwise positive.  
Norms obey triangle inequality ||a||+||b||<=||c||.  
Norms scale linearly: ||k * vec|| == ||k|| * ||vec||.  
L0 norm (not a norm) = number of non-zero elements.  
L1 norm = sum of abs of elements. Discriminates small values.  
L2 norm = Euclidian norm = sqrt of sum of square of elements. Discriminates large values.  
Linfinity = Lmax = abs of max of elements.   
Squared Euclidian norm = L2 squared. Efficient: avoids the sqrt.  
Frobenias norm of matrix = L2 norm of matrix flattened to a vector.  

In [3]:
feature1=normal(40,10,100)
print("My L2",np.sqrt((feature1**2).sum()))
print("np L2",np.linalg.norm(feature1))

My L2 417.72479705090973
np L2 417.7247970509098


In [4]:
print("Squared Euclidian,",(feature1**2).sum())
print("Squared Euclidian,",(feature1.T.dot(feature1)))
print("Squared Euclidian,",np.linalg.norm(feature1)**2)

Squared Euclidian, 174494.00607122373
Squared Euclidian, 174494.0060712238
Squared Euclidian, 174494.00607122376


# Normalization

In [5]:
def standardize(vec):
    center = vec - np.mean(vec)
    unit = center/np.std(vec)
    return unit
def show_stats(desc,vec):
    print(desc)
    print("Max %.2f Min %.2f Mean %.2f Std %.2f"%
          (max(vec),min(vec),np.mean(vec),np.std(vec)))

In [6]:
feature1=normal(40,10,100)
show_stats("Before",feature1)
standard1=standardize(feature1)
show_stats("Centered, unit variance",standard1)

Before
Max 63.48 Min 20.05 Mean 40.90 Std 9.21
Centered, unit variance
Max 2.45 Min -2.26 Mean -0.00 Std 1.00


# Covariance

In [7]:
feature1=normal(40,10,5)
feature2=normal(60,10,5)
print("Covariance\n",np.cov(feature1,feature2))

Covariance
 [[143.61142578  72.56409859]
 [ 72.56409859  63.15370311]]


In [8]:
standard1=standardize(feature1)
standard2=standardize(feature2)
print("Covariance\n",np.cov(standard1,standard2))

Covariance
 [[1.25       0.95244062]
 [0.95244062 1.25      ]]


In [46]:
def my_cov(vecs):
    I=len(vecs)
    J=len(vecs[0])
    means=np.zeros(I)
    for i in range(0,I):
        sum = 0.
        for j in range(0,J):
            sum = sum + vecs[i][j]
        means[i] = sum / J
    C=np.zeros([I,I])
    for i1 in range(0,I):
        for i2 in range(0,I):
            sum = 0.
            for j in range(0,J):
                i1elem = vecs[i1][j]
                i2elem = vecs[i2][j]
                sum = sum + (means[i1]-i1elem)*(means[i2]-i2elem)
            sum = sum / (J-1)
            C[i1,i2] = sum            
    return C

mydata=[[4,6,8],[12,8,5]]
A = [45,37,42,35,39]
B = [38,31,26,28,33]
C = [10,15,17,21,12]
#mydata = np.array([A,B])
mydata = np.array([A,B,C])
#mydata = np.array([feature1,feature2])

print("data",type(mydata),type(mydata[0]))
print("data",mydata)
print("My cov()\n",my_cov(mydata))
print("np.cov()\n",np.cov(mydata))

data <class 'numpy.ndarray'> <class 'numpy.ndarray'>
data [[45 37 42 35 39]
 [38 31 26 28 33]
 [10 15 17 21 12]]
My cov()
 [[ 15.8    9.6  -12.  ]
 [  9.6   21.7  -17.25]
 [-12.   -17.25  18.5 ]]
np.cov()
 [[ 15.8    9.6  -12.  ]
 [  9.6   21.7  -17.25]
 [-12.   -17.25  18.5 ]]


# Correlation

In [10]:
def my_cor(vecs):
    numerator = my_cov(vecs,False)
    denominator = my_cov(vecs,True)
    return numerator/denominator
