# Tensor Methods for the Perturbation Paper

In [1]:
import tensorly

# Tensor Class

## Class

In [9]:
# requirements
import numpy as np
import pandas as pd

In [7]:
class TensorData:
    def __init__(self, X: np.ndarray, frames: list):
        """
        Initialize the TensorData object.

        Args:
        X (np.ndarray): A tensor of arbitrary size.
        frames (list): A list of Pandas DataFrames, one for each mode of the tensor.
                       The length of the list should match the number of modes of the tensor.
        """
        self.X = X
        self.modes = X.ndim  # Number of modes of the tensor
        self.frames = frames
        
        # Process frames: If a frame is None or a list, convert it to a DataFrame
        for framei, frame in enumerate(self.frames):
            if isinstance(frame, list):
                self.frames[framei] = pd.DataFrame({'index': frame})
            elif frame is None:
                self.frames[framei] = pd.DataFrame({'index': [i for i in range(self.X.shape[framei])]})
        
        # Ensure that the number of dataframes matches the tensor modes
        if len(self.frames) != self.modes:
            raise ValueError(f"Expected {self.modes} dataframes, but got {len(frames)}.")

    def __repr__(self):
        return f"TensorData object with tensor of shape {self.X.shape} and {len(self.frames)} frames."

    def __getitem__(self, indices):
        """
        Index the tensor and associated dataframes.

        Args:
        indices (tuple): A tuple of indices for each mode of the tensor. These can be
                         integers, slices, or lists of indices.

        Returns:
        TensorData: A new TensorData object with the subsetted tensor and dataframes.
        """
        # Ensure the correct number of indices are provided
        if len(indices) != self.modes:
            raise IndexError(f"Expected {self.modes} indices, but got {len(indices)}.")
        
        # Subset the tensor using the provided indices
        subset_X = self.X[indices]
        
        # Adjust the number of modes after slicing
        new_modes = subset_X.ndim
        
        # Adjust frames: Only keep frames for modes that were not reduced to single values
        subset_frames = []
        for mode, idx in enumerate(indices):
            if isinstance(idx, int):
                # If it's an integer, the dimension is reduced and we skip adding the frame
                continue
            else:
                # Use the slice, list, or range as provided for slicing the dataframe
                subset_frames.append(self.frames[mode].iloc[idx])
        
        # Handle the case when new_modes is reduced to zero (for scalars)
        if new_modes == 0:
            subset_frames = []
        
        # Return a new TensorData object with the subsetted tensor and dataframes
        return TensorData(X=subset_X, frames=subset_frames)

    def transpose(self, new_order):
        """
        Transpose the modes of the tensor and reorder the associated dataframes.
    
        Args:
        new_order (tuple or list): The desired order of the modes after transposition. 
                                   It should be a permutation of the current mode indices.
    
        Returns:
        TensorData: A new TensorData object with transposed tensor and reordered dataframes.
        """
        if len(new_order) != self.modes:
            raise ValueError(f"Expected a permutation of {self.modes} modes, but got {len(new_order)}.")
        
        # Transpose the tensor according to the new mode order
        transposed_X = self.X.transpose(new_order)
        
        # Reorder the dataframes according to the new mode order
        transposed_frames = [self.frames[i] for i in new_order]
        
        return TensorData(X=transposed_X, frames=transposed_frames)

    def unfold(self, new_shape):
        if np.prod(self.X.shape) != np.prod(new_shape):
            raise ValueError("Reshape/unfold operation must maintain the same number of elements.")
        
        # Reshape the tensor
        reshaped_X = self.X.reshape(new_shape)

        merge_frames = []
        keep_frames = []
        for mode in range(len(self.X.shape)):
            if mode < len(new_shape) and self.X.shape[mode] == new_shape[mode]:
                keep_frames.append(self.frames[mode])
            else:
                merge_frames.append(self.frames[mode])
        merged_df = merge_frames[0]
        for df in merge_frames[1:]:
            merged_df = pd.merge(merged_df, df, how='cross')
        keep_frames.append(merged_df)
        return TensorData(X=reshaped_X, frames=keep_frames)


## Tests

In [8]:
def test_unfold_1():
    """This test verifies unfolding with dataframes containing single columns"""
    tensor = np.arange(2 * 3 * 4).reshape((2, 3, 4))
    
    # Create corresponding DataFrames for each mode
    df1 = pd.DataFrame({'ID': [f'ID{i}' for i in range(2)]})  # Mode 0
    df2 = pd.DataFrame({'Feature': [f'Feature{i}' for i in range(3)]})  # Mode 1
    df3 = pd.DataFrame({'Condition': [f'Condition{i}' for i in range(4)]})  # Mode 2
    
    # Initialize TensorData object
    tdata = TensorData(tensor, [df1, df2, df3])
    
    # Unfold the tensor from shape (2, 3, 4) to (2, 12) by merging modes 1 and 2
    new_shape = (2, 12)
    unfolded_tdata = tdata.unfold(new_shape)
    
    # Loop through every index of the original tensor
    for val in range(len(np.arange(2*3*4))):
        # Get indices into the 3 mode tensor: i,j,k
        i,j,k = np.where(tdata.X == val)
        i,j,k = i[0],j[0],k[0]
        
        # Get indices into the unfolded 2 mode tensor: x,y
        x,y = np.where(unfolded_tdata.X == val)
        x,y = x[0],y[0]
        
        # Fetch values from original dataframes
        original_df1_value = tdata.frames[0].iloc[i].values[0]
        original_df2_value = tdata.frames[1].iloc[j].values[0]
        original_df3_value = tdata.frames[2].iloc[k].values[0]
        
        # Fetch values from unfolded dataframes
        unfolded_df1_value = unfolded_tdata.frames[0].iloc[x].values[0]
        unfolded_df2_values = unfolded_tdata.frames[1].iloc[y].values
    
        # Assert values are consistent after unfolding
        assert original_df1_value == unfolded_df1_value, \
            f"Mismatch in mode 0: {original_df1_value} != {unfolded_df1_value}"
        assert original_df2_value == unfolded_df2_values[0], \
            f"Mismatch in mode 1: {original_df2_value} != {unfolded_df2_values[0]}"
        assert original_df3_value == unfolded_df2_values[1], \
            f"Mismatch in mode 2: {original_df3_value} != {unfolded_df2_values[1]}"
    
    print("test_unfold_1() passed for unfolding operation!")

def test_unfold_2():
    """This test verifies unfolding with dataframes containing multiple columns.

    This tests puts multiple columns in the 3rd mode.
    """
    tensor = np.arange(2 * 3 * 4).reshape((2, 3, 4))
    
    # Create corresponding DataFrames for each mode
    df1 = pd.DataFrame({'ID': [f'ID{i}' for i in range(2)]})  # Mode 0
    df2 = pd.DataFrame({'Feature': [f'Feature{i}' for i in range(3)]})  # Mode 1
    df3 = pd.DataFrame({'Condition': [f'Condition{i}' for i in range(4)], 'Label': [f'Label{i}' for i in range(4)]})  # Mode 2
    
    # Initialize TensorData object
    tdata = TensorData(tensor, [df1, df2, df3])
    
    # Unfold the tensor from shape (2, 3, 4) to (2, 12) by merging modes 1 and 2
    new_shape = (2, 12)
    unfolded_tdata = tdata.unfold(new_shape)
    
    # Loop through every index of the original tensor
    for val in range(len(np.arange(2*3*4))):
        # Get indices into the 3 mode tensor: i,j,k
        i,j,k = np.where(tdata.X == val)
        i,j,k = i[0],j[0],k[0]
        
        # Get indices into the unfolded 2 mode tensor: x,y
        x,y = np.where(unfolded_tdata.X == val)
        x,y = x[0],y[0]
        
        # Fetch values from original dataframes
        original_df1_value = tdata.frames[0].iloc[i].values[0]
        original_df2_value = tdata.frames[1].iloc[j].values[0]
        original_df3_value = tdata.frames[2].iloc[k].values
        
        # Fetch values from unfolded dataframes
        unfolded_df1_value = unfolded_tdata.frames[0].iloc[x].values[0]
        unfolded_df2_values = unfolded_tdata.frames[1].iloc[y].values
    
        # Assert values are consistent after unfolding
        assert original_df1_value == unfolded_df1_value, \
            f"Mismatch in mode 0: {original_df1_value} != {unfolded_df1_value}"
        assert original_df2_value == unfolded_df2_values[0], \
            f"Mismatch in mode 1: {original_df2_value} != {unfolded_df2_values[0]}"
        assert original_df3_value[0] == unfolded_df2_values[1], \
            f"Mismatch in mode 2: {original_df3_value} != {unfolded_df2_values[1]}"
        assert original_df3_value[1] == unfolded_df2_values[2], \
            f"Mismatch in mode 2: {original_df3_value} != {unfolded_df2_values[1]}"
    
    print("test_unfold_2() passed for unfolding operation!")

def test_unfold_3():
    """This test verifies unfolding with dataframes containing multiple columns.

    This tests puts multiple columns in the 2nd mode.
    """
    tensor = np.arange(2 * 3 * 4).reshape((2, 3, 4))
    
    # Create corresponding DataFrames for each mode
    df1 = pd.DataFrame({'ID': [f'ID{i}' for i in range(2)]})  # Mode 0
    df2 = pd.DataFrame({'Feature': [f'Feature{i}' for i in range(3)], 'Label': [f'Label{i}' for i in range(3)]})  # Mode 1
    df3 = pd.DataFrame({'Condition': [f'Condition{i}' for i in range(4)]})  # Mode 2
    
    # Initialize TensorData object
    tdata = TensorData(tensor, [df1, df2, df3])
    
    # Unfold the tensor from shape (2, 3, 4) to (2, 12) by merging modes 1 and 2
    new_shape = (2, 12)
    unfolded_tdata = tdata.unfold(new_shape)
    
    # Loop through every index of the original tensor
    for val in range(len(np.arange(2*3*4))):
        # Get indices into the 3 mode tensor: i,j,k
        i,j,k = np.where(tdata.X == val)
        i,j,k = i[0],j[0],k[0]
        
        # Get indices into the unfolded 2 mode tensor: x,y
        x,y = np.where(unfolded_tdata.X == val)
        x,y = x[0],y[0]
        
        # Fetch values from original dataframes
        original_df1_value = tdata.frames[0].iloc[i].values[0]
        original_df2_value = tdata.frames[1].iloc[j].values
        original_df3_value = tdata.frames[2].iloc[k].values[0]
        
        # Fetch values from unfolded dataframes
        unfolded_df1_value = unfolded_tdata.frames[0].iloc[x].values[0]
        unfolded_df2_values = unfolded_tdata.frames[1].iloc[y].values
    
        # Assert values are consistent after unfolding
        assert original_df1_value == unfolded_df1_value, \
            f"Mismatch in mode 0: {original_df1_value} != {unfolded_df1_value}"
        assert original_df2_value[0] == unfolded_df2_values[0], \
            f"Mismatch in mode 1: {original_df2_value} != {unfolded_df2_values[0]}"
        assert original_df2_value[1] == unfolded_df2_values[1], \
            f"Mismatch in mode 2: {original_df3_value} != {unfolded_df2_values[1]}"
        assert original_df3_value == unfolded_df2_values[2], \
            f"Mismatch in mode 2: {original_df3_value} != {unfolded_df2_values[1]}"

    print("test_unfold_3() passed for unfolding operation!")

def test_transpose_1():
    """This test verifies transposing modes in a tensor with single-column dataframes"""
    # Create corresponding DataFrames for each mode
    df1 = pd.DataFrame({'ID': [f'ID{i}' for i in range(2)]})  # Mode 0
    df2 = pd.DataFrame({'Feature': [f'Feature{i}' for i in range(3)]})  # Mode 1
    df3 = pd.DataFrame({'Condition': [f'Condition{i}' for i in range(4)]})  # Mode 2
    
    # Initialize TensorData object
    tdata = TensorData(tensor, [df1, df2, df3])
    
    # Perform transpose (swap mode 0 and mode 2)
    transposed_tdata = tdata.transpose((2, 1, 0))
    
    for val in range(len(np.arange(2 * 3 * 4))):
        # Get indices into the original tensor: i, j, k
        i, j, k = np.where(tdata.X == val)
        i, j, k = i[0], j[0], k[0]
    
        i_t, j_t, k_t = np.where(transposed_tdata.X == val)
        i_t, j_t, k_t = i_t[0], j_t[0], k_t[0]
    
        # Fetch values from original dataframes
        original_df1_value = tdata.frames[0].iloc[i].values[0]
        original_df2_value = tdata.frames[1].iloc[j].values[0]
        original_df3_value = tdata.frames[2].iloc[k].values[0]
    
        # Fetch values from transposed dataframes (note the mode transpositions)
        transposed_df1_value = transposed_tdata.frames[0].iloc[i_t].values[0]  # Mode 2 -> Mode 0
        transposed_df2_value = transposed_tdata.frames[1].iloc[j_t].values[0]  # Mode 1 -> Mode 1
        transposed_df3_value = transposed_tdata.frames[2].iloc[k_t].values[0]  # Mode 0 -> Mode 2
    
        # Assert values are consistent after transposition
        assert original_df1_value == transposed_df3_value, \
            f"Mismatch in transposed mode 0: {original_df1_value} != {transposed_df3_value}"
        assert original_df2_value == transposed_df2_value, \
            f"Mismatch in mode 1: {original_df2_value} != {transposed_df2_value}"
        assert original_df3_value == transposed_df1_value, \
            f"Mismatch in transposed mode 2: {original_df3_value} != {transposed_df1_value}"
    
    print("test_transpose_1() passed for transpose operation!")


test_unfold_1()
test_unfold_2()
test_unfold_3()
test_transpose_1()

NameError: name 'pd' is not defined