In [1]:
"""Errors might remain if Kernel is not restarted"""
"""Changes to local modules might not update if Kernel is not restarted"""

'Changes to local modules might not update if Kernel is not restarted'

In [2]:
import numpy as np
#### In the case of correspondence analysis, 
#### the coordinates representations of the points seems to be the right scalling
Data = np.array([[21, 143, 15, 31],
                 [10, 3, 156, 34],
                 [250, 10, 14, 1],
                 [120, 20, 1, 370],
                 [5, 3, 12, 145]
                ])
M = Data.shape[0]
N = Data.shape[1]
joint_xy = Data/np.sum(Data)
marj_x = np.sum(joint_xy, axis = 1)
marj_y = np.sum(joint_xy, axis = 0)

Dev_mat  =  joint_xy - marj_x[:, np.newaxis]*marj_y
# Standardize the deviation matrix
Dr = np.diag(1/np.sqrt(marj_x))
Dc = np.diag(1/np.sqrt(marj_y))

# standardized residuals
D = Dr.dot(Dev_mat.dot(Dc))

# Compute CA coords 
X = (Dr).dot(D)
Y = (D.dot(Dc)).T # just bring them on the rows

In [3]:
# This function works pretty well from our previous projects
B = Dr.dot(D.dot(Dc)) ## Pearson ratio(x_m, y_n) - 1 
fXY = np.exp(- B) 

In [4]:
import scipy as sp
# compute distance/proximity within sets of points (must be Euclidean distance matrices)
DX = sp.spatial.distance.pdist(X)
DX = sp.spatial.distance.squareform(DX)
DY = sp.spatial.distance.pdist(Y)
DY = sp.spatial.distance.squareform(DY)

In [5]:
# This is directly the norms of the points
UX = np.linalg.norm(X, axis = 1)
UY = np.linalg.norm(Y, axis = 1)

In [6]:
# Provide parameters (c1, c2, c3) as dictionary, using conditions (i), (ii), and (iii) of Theorem 3.1 as a guideline
c1, c2 = 1/2, 1 # or c2 = 2 
a = 1. - 1./(M+N)
b = 2.*c2/(M+N)
c3 = ((2.*c1 + c2) - b)/a
c = {"c1":c1, "c2":c2, "c3":c3} 
print(c1, c2, c3) 

0.5 1 2.0


In [7]:
# Compute cosine law matrix
import Methods.CosLM as CosLM
COS_MAT, c1, c2, c3, zeta_f = CosLM.CosLM(DX, DY, UX, UY, fXY, c) # w1 = x1
print("Is the cosine law matrix is symmetric:", np.all(np.isclose(COS_MAT,COS_MAT.T)))
sigma, U = sp.linalg.eigh(COS_MAT)
sigma = np.real(sigma) # if COS_MAT is symmetric, thus imaginary number are supposed to be zero or numerical zeros
sigma[np.isclose(sigma, np.zeros(len(sigma)))] = 0
print(sigma[np.argsort(sigma)[::-1]])

Is the cosine law matrix is symmetric: True
[38.40708368 18.60563925 17.56353003 14.45702441 14.15110768 12.30342337
 11.79638296 10.65062321 10.18650912  2.25931586  0.        ]


In [8]:
print("Is COS_MAT PSD:", np.all(sigma>=0))
if (not np.all(sigma>=0)):
    print("Check the conditions in Theorem 3.1")

Is COS_MAT PSD: True


In [9]:
### check distances and proximities in Euclidean embedding ###
W = U.dot(np.diag(np.sqrt(sigma)))
EmbX = W[1:M+1, :]
EmbY = W[M+2:, :]
o = W[M+1, :]
DMat = sp.spatial.distance.pdist(W[1:, :]) 
DMat = sp.spatial.distance.squareform(DMat)
# Within sets 
DXe = DMat[:M, :M]
DYe = DMat[M+1:, M+1:]

print("Check theory on distances for the set X", np.all(np.isclose(DXe, np.sqrt(DX**2 + c3*zeta_f) - np.sqrt(np.diag(c3*zeta_f*np.ones(DX.shape[0])))))) # diagonal elements of distance matrices are always 0s
print("Check theory on distances for the set Y", np.all(np.isclose(DYe, np.sqrt(DY**2 + c3*zeta_f) - np.sqrt(np.diag(c3*zeta_f*np.ones(DY.shape[0])))))) # diagonal elements of distance matrices are always 0s
print("Check theory on distance to orgin for the set X:", np.all(np.isclose(DMat[M, :M], np.sqrt(UX**2 + c3*zeta_f))))
print("Check theory on distance to orgin for the set Y:", np.all(np.isclose(DMat[M, M+1:], np.sqrt(UY**2 + c3*zeta_f))))

# Between sets
fXYe = DMat[:M, M+1:]
print("Check theory on distance between points of sets:", np.all(np.isclose(fXYe, np.sqrt(fXY**2 + c3*zeta_f))))


Check theory on distances for the set X True
Check theory on distances for the set Y True
Check theory on distance to orgin for the set X: True
Check theory on distance to orgin for the set Y: True
Check theory on distance between points of sets: True
