# Treelets Python Implementation

Python implementation of the treelets algorithm to cement my understanding. Working functions will be wrapped in a module and used to compare the treelets decomposition to PCA. 

## Setup

In [61]:
import os 
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

## Data

In [3]:
X = np.array([np.random.normal(0,1,100),
              np.random.normal(0,1,100),
              np.random.normal(0,1,100),
              np.random.normal(0,1,100)])

In [4]:
C = np.cov(X)
cc = np.corrcoef(X)

## Functions

In [115]:
def cov2cor(C): 
    """
    Converts covraiance matrix (numpy) to correlation matrix. 
    Args: 
        - C: variance-covaraince matrix
    """
    
    d = np.power(np.diag(C), -0.5)
    D = np.diag(d)
    CC = np.matmul(np.matmul(D,C),D)
    
    return CC

In [19]:
def jacobi_rotation(C,a,b):
    """
    Finds a 2x2 Jacobi rotation matrix that decocorrelates two variables. 
    Args: 
        - C: variance-covariance matrix
        - a: index of first variable
        - b: index of second variable
    """
    
    p = len(C)
    C_aa = C[a,a]
    C_bb = C[b,b]
    C_ab = C[a,b]
    
    theta = 0.5*np.arctan(2*C_ab/(C_aa - C_bb))
    cos_theta = np.cos(theta)
    sine_theta = np.sin(theta)
    
    rotation = np.array([[cos_theta, -sine_theta],
                  [sine_theta, cos_theta]])
    
    return rotation

In [None]:
def rotate_data(X, a, b, J): 
    """
    De-correlates two variables given their index and a Jacobi rotation matrix. 
    Args: 
        - J: 2x2 Jacobi rotation matrix
        - X: nxp array of observations 
        - a: index of first variable
        - b: index of second variabl
    """
    
    Z = X[[a,b]]
    
    return True 

In [130]:
def rotate_covariance_matrix(C, a, b, J):
    """
    De-correlates two variables given their index and a Jacobi rotation matrix. 
    Args: 
        - J: 2x2 Jacobi rotation matrix
        - C: variance-covaraince matrix 
        - a: index of first variable
        - b: index of second variable
    """
    
    A = np.array([[C[a,a],C[a,b]],
                  [C[b,a],C[b,b]]])
    B = np.matmul(np.matmul(J.transpose(),A),J)
    C[a,a], C[a,b], C[b,a], C[b,b] = B.flatten()
    
    return C

In [158]:
def treelet_decomposition(X, depth): 
    """
    Performs treelet decomposition to a specified depth. 
    Args:
        - X: nxp array of observations 
        - depth: treelet depth ∈ {1,...,p-1} 
        
    Returns a nested dictionary for the treelet decomposition at each level. 
        - C: estimated correlation matrix at level
        - J: rotation performed at level
        - pair: variables were merged
        - info: {0 = sum variable, 1 = difference varaible}
    """
    
    treelet = {}
    treelet[0] = {C: np.cov(X), 
                  J: None,
                  p: (None, None)}
    
    
    return treelet

In [159]:
treelet_decomposition(X,0)

NameError: name 'p' is not defined

## Tests...