In [1]:
import torch
import numpy as np
import scipy.io as sio
import os
import time
from scipy import stats

import gc

gc.collect()

0

In [2]:
class EnsemblePursuitPyTorch():
    
    def calculate_cost_delta(self):
        cost_delta=torch.clamp(torch.matmul(self.current_v,self.X),min=0,max=None)**2/torch.matmul(self.current_v,self.current_v)-self.lambd
        #print('mean cost_delta',torch.mean(cost_delta))
        return cost_delta
    
    def fit_one_assembly(self):
        with torch.cuda.device(0) as device:
            top_corr_neuron=self.select_top_k_corr_neuron()
            #print('corr',top_corr_neuron)
            #print('chosen neuron',top_corr_neuron)
            #choose_neuron_idx=np.random.randint(0,self.sz[1],1)[0]
            choose_neuron_idx=top_corr_neuron
            #print(choose_neuron_idx)
            self.selected_neurons=torch.zeros([self.sz[1]]).cuda()
            self.selected_neurons[choose_neuron_idx]=1
            self.current_v=self.X[:,choose_neuron_idx]
            max_delta_cost=1000
            self.i=0
            while max_delta_cost>0:
                cost_delta=self.calculate_cost_delta()
                #print(cost_delta.size())
                #print(self.current_u.size())
                mask=self.selected_neurons.clone()
                mask[self.selected_neurons==0]=1
                mask[self.selected_neurons!=0]=0
                masked_cost_delta=mask*cost_delta
                #print(masked_cost_delta.type())
                #print(masked_cost_delta)
                values,sorted_neurons=masked_cost_delta.sort()
                max_delta_neuron=sorted_neurons[-1]
                #print(max_delta_neuron.item())
                #print(values)
                max_delta_cost=values[-1]
                #print('max delta',max_delta_cost)
                self.current_u=torch.clamp(torch.matmul(self.current_v,self.X),min=0,max=None)/torch.matmul(self.current_v,self.current_v)
                #print(self.current_u[max_delta_neuron])
                if max_delta_cost>0:
                    self.current_v=(self.current_v+self.X[:,max_delta_neuron.item()])/2
                    self.selected_neurons[max_delta_neuron.item()]=1
                self.i+=1
            #print(i)
            self.current_u=torch.clamp(torch.matmul(self.current_v,self.X),min=0,max=None)/torch.matmul(self.current_v,self.current_v)
            self.U=torch.cat((self.U,self.current_u.view(self.X.size(1),1)),1)
            self.V=torch.cat((self.V,self.current_v.view(1,self.X.size(0))),0)
    
    def corrcoef(self,x):
        """
        Mimics `np.corrcoef`

        Arguments
        ---------
        x : 2D torch.Tensor

        Returns
        -------
        c : torch.Tensor
            if x.size() = (5, 100), then return val will be of size (5,5)

        Numpy docs ref:
            https://docs.scipy.org/doc/numpy/reference/generated/numpy.corrcoef.html
        Numpy code ref: 
            https://github.com/numpy/numpy/blob/v1.12.0/numpy/lib/function_base.py#L2933-L3013

        Example:
            >>> x = np.random.randn(5,120)
            # result is a (5,5) matrix of correlations between rows
            >>> np_corr = np.corrcoef(x)
            >>> th_corr = corrcoef(torch.from_numpy(x))
            >>> np.allclose(np_corr, th_corr.numpy())
            # [out]: True
        """
        # calculate covariance matrix of rows
        mean_x = torch.mean(x,0)
        #print(mean_x.size())
        #print(mean_x.expand_as(x))
        xm = torch.sub(x,mean_x)
        c = xm.mm(xm.t())
        c = c / (x.size(1) - 1)

        # normalize covariance matrix
        d = torch.diag(c)
        stddev = torch.pow(d, 0.5)
        c = c.div(stddev.expand_as(c))
        c = c.div(stddev.expand_as(c).t())

        # clamp between -1 and 1
        # probably not necessary but numpy does it
        c = torch.clamp(c, -1.0, 1.0)

        return c
    
    def select_top_k_corr_neuron(self):
        corr=self.corrcoef(self.X.t())
        vals,ix=corr.sort(dim=1)
        top_vals=vals[:,:-1][:,self.sz[1]-6:]
        #print(top_vals)
        av=torch.mean(top_vals,dim=1)
        vals,top_neurons=torch.sort(av)
        top_neuron=top_neurons[self.sz[1]-101:]
        top_val=vals[self.sz[1]-101:]
        idx=torch.randint(0,100,size=(1,))
        print(idx)
        print('top n', top_neuron[idx[0]], top_val[idx[0]])
        return top_neuron[idx[0]].item()
    
    
    def fit_transform(self,X,lambd,n_ensembles=None):
        with torch.cuda.device(0) as device:
            self.lambd=lambd
            print(X)
            self.X=stats.zscore(X,axis=0)
            self.X=np.nan_to_num(self.X)
            print(np.mean(self.X,axis=0))
            self.X=torch.cuda.FloatTensor(self.X) 
            print(self.X)
            self.sz=self.X.size()
            print(self.sz[0],self.sz[1])
            self.U=torch.zeros((self.X.size(1),1)).cuda()
            self.V=torch.zeros([1,self.X.size(0)]).cuda()
            self.nr_of_neurons=[]
            #self.current_u=torch.zeros([self.X.size(1)]).cuda()
            cost_lst=[]
            for iteration in range(0,n_ensembles):
                self.fit_one_assembly()
                self.nr_of_neurons.append(self.i)
                U_V=torch.mm(self.current_u.view(self.sz[1],1),self.current_v.view(1,self.sz[0]))
                #print(U_V.size())
                res=(self.X-U_V.t())
                self.X=res
                #print('norm',torch.norm(self.X))
                self.cost=torch.mean(torch.mul(res,res))
                #print('cost',self.cost)
                cost_lst.append(self.cost.item())
                #Reset u for new iteration
            self.U=self.U[:,1:]
            self.V=self.V[1:,:]
            print(self.X.size())
            print(self.U.size())
            print(self.V.size())
            return torch.matmul(self.U,self.V).t().cpu(), self.nr_of_neurons, self.U.cpu(), self.V.cpu(), cost_lst

In [3]:
X=sio.loadmat('/home/maria/Documents/EnsemblePursuit/data/natimg2800_M170717_MP034_2017-09-11.mat')['stim']['resp'][0][0]
X[X<0]=0
print(X.shape)

(5880, 10103)


In [4]:
np.random.seed(7)
ep=EnsemblePursuitPyTorch()
s=time.time()
U_V,nr_of_neurons,U,V, cost_lst=ep.fit_transform(X,300,100)
e=time.time()
print(e-s)
print(nr_of_neurons)
print(cost_lst)

[[31.466291   11.154725    0.         ... 35.939972    0.
   0.        ]
 [41.705284    0.          0.         ...  0.         44.25718
  34.889084  ]
 [ 0.          0.          0.         ... 92.24999    22.162407
  21.241     ]
 ...
 [12.287675   18.75502     0.         ... 15.54476    55.489014
  21.571573  ]
 [14.505278   27.549797    0.         ...  3.5592616   0.
  26.330444  ]
 [ 0.65212256 31.231289    0.         ...  0.         14.375819
  14.927368  ]]


  ret = um.sqrt(ret, out=ret)
  return (a - mns) / sstd
  return (a - mns) / sstd


[-2.46528060e-08 -7.78509630e-08 -5.44956755e-08 ...  1.29751605e-08
 -1.94627403e-09  5.19006438e-09]
tensor([[ 1.1550, -0.1022, -0.5535,  ...,  0.5873, -0.8588, -0.7753],
        [ 1.7695, -0.8956, -0.5535,  ..., -0.8096,  0.8385,  0.5909],
        [-0.7333, -0.8956, -0.5535,  ...,  2.7760, -0.0089,  0.0565],
        ...,
        [ 0.0041,  0.4383, -0.5535,  ..., -0.2054,  1.2692,  0.0694],
        [ 0.1372,  1.0639, -0.5535,  ..., -0.6713, -0.8588,  0.2557],
        [-0.6942,  1.3257, -0.5535,  ..., -0.8096, -0.3075, -0.1908]],
       device='cuda:0')
5880 10103
tensor([66])
top n tensor(2398, device='cuda:0') tensor(0.4354, device='cuda:0')
tensor([93])
top n tensor(3183, device='cuda:0') tensor(0.4809, device='cuda:0')
tensor([33])
top n tensor(1458, device='cuda:0') tensor(0.4139, device='cuda:0')
tensor([93])
top n tensor(3183, device='cuda:0') tensor(0.4805, device='cuda:0')
tensor([61])
top n tensor(3214, device='cuda:0') tensor(0.4313, device='cuda:0')
tensor([80])
top n tens

tensor([80])
top n tensor(9391, device='cuda:0') tensor(0.4048, device='cuda:0')
tensor([12])
top n tensor(5209, device='cuda:0') tensor(0.3558, device='cuda:0')
tensor([93])
top n tensor(1616, device='cuda:0') tensor(0.4252, device='cuda:0')
tensor([60])
top n tensor(325, device='cuda:0') tensor(0.3858, device='cuda:0')
tensor([45])
top n tensor(7003, device='cuda:0') tensor(0.3675, device='cuda:0')
torch.Size([5880, 10103])
torch.Size([10103, 100])
torch.Size([100, 5880])
194.77644181251526
[249, 230, 491, 792, 269, 355, 81, 272, 109, 187, 175, 442, 324, 420, 480, 195, 428, 6, 227, 52, 355, 190, 80, 471, 184, 101, 4, 396, 178, 235, 315, 91, 262, 154, 243, 82, 262, 202, 247, 315, 143, 35, 228, 216, 221, 310, 209, 44, 223, 6, 91, 327, 248, 186, 364, 324, 395, 172, 192, 66, 124, 6, 4, 249, 152, 147, 86, 171, 40, 76, 92, 56, 114, 84, 85, 26, 159, 110, 206, 105, 157, 238, 152, 143, 209, 168, 72, 80, 124, 72, 117, 84, 98, 197, 7, 121, 161, 92, 7, 168]
[0.998864471912384, 0.9972290396690369

In [5]:
def test_train_split(data,stim):
    unique, counts = np.unique(stim.flatten(), return_counts=True)
    count_dict=dict(zip(unique, counts))

    keys_with_enough_data=[]
    for key in count_dict.keys():
        if count_dict[key]==2:
            keys_with_enough_data.append(key)

    filtered_stims=np.isin(stim.flatten(),keys_with_enough_data)

    #Arrange data so that responses with the same stimulus are adjacent
    z=stim.flatten()[np.where(filtered_stims)[0]]
    sortd=np.argsort(z)
    istim=np.sort(z)
    X=data[filtered_stims,:]
    out=X[sortd,:].copy()

    x_train=out[::2,:]
    y_train=istim[::2]
    x_test=out[1::2,:]
    y_test=istim[1::2]
    
    return x_train, x_test, y_train, y_test

def evaluate_model(x_train,x_test):
    corr_mat=np.zeros((x_train.shape[0],x_train.shape[0]))
    for j in range(0,x_train.shape[0]):
        for i in range(0,x_test.shape[0]):
            corr_mat[j,i]=np.corrcoef(x_train[j,:],x_test[i,:])[0,1]
    print(np.mean(np.argmax(corr_mat, axis=0) == np.arange(0,x_train.shape[0],1,int)))
    
stim=sio.loadmat('/home/maria/Documents/EnsemblePursuit/data/natimg2800_M170717_MP034_2017-09-11.mat')['stim']['istim'][0][0]
x_train, x_test, y_train, y_test=test_train_split(np.array(V.t()),stim)
evaluate_model(x_train,x_test)

0.05964285714285714
