In [1]:
import numpy as np
import torch

In [88]:
# X
X = np.random.randint(low=0,high=5,size=(10,5),dtype='int')

corr = np.corrcoef(X,rowvar=False)
print (corr)

[[ 1.         -0.01850662  0.28519936 -0.55503634  0.57330977]
 [-0.01850662  1.          0.17137994  0.1977558  -0.38519575]
 [ 0.28519936  0.17137994  1.         -0.09097159 -0.09541414]
 [-0.55503634  0.1977558  -0.09097159  1.         -0.68680584]
 [ 0.57330977 -0.38519575 -0.09541414 -0.68680584  1.        ]]


In [89]:
X_ = X - np.mean(X,axis=0)

covX = np.matmul(X_.T,X_)/X.shape[0]

stdX = np.std(X,axis=0)[:,np.newaxis]

std_matrix = np.matmul(stdX,stdX.T)

corr2 = covX/std_matrix

In [90]:
corr

array([[ 1.        , -0.01850662,  0.28519936, -0.55503634,  0.57330977],
       [-0.01850662,  1.        ,  0.17137994,  0.1977558 , -0.38519575],
       [ 0.28519936,  0.17137994,  1.        , -0.09097159, -0.09541414],
       [-0.55503634,  0.1977558 , -0.09097159,  1.        , -0.68680584],
       [ 0.57330977, -0.38519575, -0.09541414, -0.68680584,  1.        ]])

In [91]:
corr2

array([[ 1.        , -0.01850662,  0.28519936, -0.55503634,  0.57330977],
       [-0.01850662,  1.        ,  0.17137994,  0.1977558 , -0.38519575],
       [ 0.28519936,  0.17137994,  1.        , -0.09097159, -0.09541414],
       [-0.55503634,  0.1977558 , -0.09097159,  1.        , -0.68680584],
       [ 0.57330977, -0.38519575, -0.09541414, -0.68680584,  1.        ]])

In [119]:
def get_corr_matrix(X):
    '''
    Input:
        X: num_obj,D tensor
    
    Output:
        rdm: num_obj,num_obj tensor
    '''
    # make X zero mean
    X_ = X - torch.mean(X,dim=0)
    
    # compute Covariance matrix
    cov_matrix = torch.matmul(X_.transpose(0,1),X_)/X.shape[0]
    
    # get standard deviations for each of the dimensions
    std_devs = torch.std(X,dim=0,unbiased=False).unsqueeze(dim=1)
    
    # get normalizing standard deviation product matrix
    std_matrix = torch.matmul(std_devs,std_devs.transpose(0,1))
    
    # compute correlation matrix
    corr_matrix = torch.div(cov_matrix,std_matrix)
    
    return corr_matrix

In [120]:
a = get_corr_matrix(torch.from_numpy(X).float())
a

tensor([[ 2.2900, -0.0400,  0.6700, -1.1300,  1.2300],
        [-0.0400,  2.0400,  0.3800,  0.3800, -0.7800],
        [ 0.6700,  0.3800,  2.4100, -0.1900, -0.2100],
        [-1.1300,  0.3800, -0.1900,  1.8100, -1.3100],
        [ 1.2300, -0.7800, -0.2100, -1.3100,  2.0100]])
tensor([[1.5133],
        [1.4283],
        [1.5524],
        [1.3454],
        [1.4177]])


tensor([[ 1.0000, -0.0185,  0.2852, -0.5550,  0.5733],
        [-0.0185,  1.0000,  0.1714,  0.1978, -0.3852],
        [ 0.2852,  0.1714,  1.0000, -0.0910, -0.0954],
        [-0.5550,  0.1978, -0.0910,  1.0000, -0.6868],
        [ 0.5733, -0.3852, -0.0954, -0.6868,  1.0000]])

In [7]:
def get_train_val_split_indices(total_num_imgs=1960,val_ratio=0.2):
    '''
    Inputs:
        total_num_imgs: number of images in the dataset
        val_ratio: fraction of total images to be used in the validation set

    Outputs:
        train_mask: (total_num_imgs*(1-val_ratio),) numpy array, with indices for training data
        val_mask: (total_num_imgs*val_ratio,) numpy array, with indices for validation data
    '''
    # number of objects
    num_obj = 49
    
    # number of images per object
    num_imgs_per_obj = int(total_num_imgs/num_obj)
    
    # number of validation images for each object according to val_ratio
    num_val_imgs_per_obj = int(num_imgs_per_obj*val_ratio)
    
    # validation mask with indices for validation data
    val_mask = []
    
    for obj_count in range(num_obj):
        choose = np.random.choice(range(num_imgs_per_obj),num_val_imgs_per_obj,replace=False)
        choose += num_imgs_per_obj*obj_count
        val_mask.append(choose)
    
    # concatenate validation indices for each object
    val_mask = np.concatenate(val_mask)
    
    # compute train mask
    train_mask = np.array([x for x in range(total_num_imgs) if x not in val_mask])

    return train_mask,val_mask

In [8]:
train_mask,val_mask = get_train_val_split_indices()

In [9]:
train_mask.shape

(1568,)

In [10]:
val_mask.shape

(392,)

In [11]:
val_mask

array([  37,   19,   11,   32,    6,   15,   12,   23,   79,   42,   44,
         45,   54,   68,   71,   41,   84,   99,   83,   86,   82,  112,
         89,  118,  131,  134,  158,  157,  126,  153,  150,  127,  164,
        195,  166,  179,  191,  176,  182,  161,  212,  224,  235,  231,
        208,  238,  218,  230,  243,  276,  271,  268,  242,  278,  253,
        267,  318,  297,  301,  293,  292,  281,  304,  280,  322,  356,
        338,  357,  342,  333,  323,  337,  382,  387,  380,  391,  383,
        376,  381,  366,  404,  406,  402,  410,  425,  417,  437,  421,
        448,  440,  451,  441,  453,  477,  443,  446,  513,  517,  518,
        500,  491,  486,  509,  501,  532,  522,  538,  526,  545,  543,
        556,  555,  569,  567,  594,  595,  585,  560,  565,  566,  639,
        601,  619,  630,  608,  602,  620,  626,  640,  658,  656,  675,
        662,  668,  665,  678,  704,  698,  709,  693,  711,  688,  707,
        692,  757,  733,  724,  748,  727,  750,  7

In [12]:
train_mask

array([   0,    1,    2, ..., 1956, 1958, 1959])

In [16]:
a = np.arange(5)
np.random.shuffle(a)
a

array([4, 0, 3, 1, 2])