In [1]:
!pip install --user graphistry

Collecting graphistry
  Downloading graphistry-0.20.4-py3-none-any.whl (89 kB)
Collecting pyarrow>=0.15.0
  Downloading pyarrow-6.0.1-cp39-cp39-win_amd64.whl (15.5 MB)
Installing collected packages: pyarrow, graphistry
Successfully installed graphistry-0.20.4 pyarrow-6.0.1




In [6]:
import graphistry
import pandas as pd

graphistry.register(api=3, username='aswinvk28', password='ByrMAP@15')  # Free: hub.graphistry.com
rows = pd.read_csv('../dataset/input_features.csv')
graphistry.hypergraph(rows.iloc[:, 1:4])['graph'].plot()

# links 781803
# events 260601
# attrib entities 13040


In [11]:
import numpy as np
import pandas as pd

# Make zero mean for the dataframe
def demean_data(X_df):
    '''
    Demeaning the data
    @param X_df: Pandas DataFrame or Series
    @return: pd.DataFrame()
    '''
    return (X_df - X_df.mean(axis=0))

# returns transformed x, prin components, var explained
def principal_components_analysis(data):
    '''
    Principal Components Analysis conducted on Data by:
        1. demeaning the Data
        2. Symmetrisation of Input Matrix
        3. Calculating Eigen Values and Eigen Vectors
        4. Transforming to PCA Space by multiplying by Eigen Vectors
        5. Calculating Explained Variance
        6. Ordering the Results by Explained Variance
    @param data: pd.DataFrame Data consisting of original data
    @return: tuple()
    '''
    # get the original dimensions of a matrix
    dimensions = data.shape[1]
    # make zero mean of matrix
    z = demean_data(data)
    # make a matrix symmetric, invertible
    symmetric_matrix = make_a_matrix_symmetric_invertible(z)
    # find eigen values and eigen vectors
    (eigenvalues, eigenvectors) = np.linalg.eig(symmetric_matrix)  # 'right-hand'
    # returns transformed matrix
    transformed_matrix = pca_transformed(z, eigenvectors, dimensions)
    # find the principal components
    pc = eigenvectors.T
    # find explained variances
    explained_variance = np.var(transformed_matrix, axis=0, ddof=1)  # col sample var
    # take the sum of variances to 1 degree
    sum_of_variances = np.sum(explained_variance)
    # normalise the variances (take the ratio)
    explained_variance_ratio = explained_variance / sum_of_variances
    # order everything based on explained variance ratio
    ordering = np.argsort(explained_variance_ratio)[::-1]
    # order the transformed matrix
    transformed_matrix = transformed_matrix[:,ordering]
    pc = pc[ordering,:]
    explained_variance_ratio = explained_variance_ratio[ordering]
    return transformed_matrix, pc, explained_variance_ratio

# this code will make a non-square matrix a square matrix, a symmetric matrix as well as an invertible matrix if the determinant is non-zero
def make_a_matrix_symmetric_invertible(z):
    '''
    Symmetrising the Input Data
    @param z: Input Data
    @return: np.array
    '''
    return np.dot(z.T, z)

# get the transformed matrix space
def pca_transformed(z, eigenvectors, dimensions):
    '''
    Transforming the Input Data to PCA Space
    @param z: Input Data
    @param eigenvectors: Eigen vectors of Input data
    @param dimensions: Dimensions Required
    @return: np.array
    '''
    return np.dot(z, eigenvectors[:,0:dimensions])

principal_components_analysis(pd.DataFrame(np.array([[1,0],[0,1]])))

(array([[ 7.07106781e-01, -1.11022302e-16],
        [-7.07106781e-01,  1.11022302e-16]]),
 array([[ 0.70710678, -0.70710678],
        [ 0.70710678,  0.70710678]]),
 array([1.00000000e+00, 2.46519033e-32]))