<a href="https://colab.research.google.com/github/Enterprise-D/sc_multimodal/blob/main/multimodal_sample_run.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Evaluation

In [148]:
import random
import joblib
import numpy as np
import pandas as pd
from sklearn import decomposition, model_selection, preprocessing

import os
import gc
import scipy.sparse as sps
from tqdm import tqdm as tqdm

Fix random seeds:

In [149]:
random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)

tSVD on original data:

In [150]:
tsvd_input = joblib.load('tsvd_input.pkl')

valid_input = sps.load_npz('sample_input.npz')

U_valid_input = tsvd_input.transform(valid_input)

tSVD on binarized data:

In [151]:
tsvd_input_binary = joblib.load('tsvd_input_binary.pkl')

valid_input[valid_input.nonzero()]=1

U_valid_input_bin = tsvd_input_binary.transform(valid_input)

In [152]:
import torch
import torch.nn as nn
import torch.utils.data as Data

torch.manual_seed(random_seed)

<torch._C.Generator at 0x7fd91165ebd0>

Prepare files:

In [153]:
valid_target = sps.load_npz('sample_target.npz')

hvg_index = np.load('hvg_index.npy', allow_pickle=True)
hvg_index

array(['2', '13', '18', ..., '23316', '23377', '23401'], dtype=object)

Normalize inputs:

In [154]:
U_valid_input_std = U_valid_input.std(axis=1).reshape(-1, 1)
U_valid_input_norm = U_valid_input / U_valid_input_std

In [155]:
valid_target_mean = np.ma.mean(np.ma.masked_equal(valid_target.toarray(),0),axis=1).data
valid_target = valid_target/valid_target_mean.reshape(-1,1)

Define dataset and dataloader:

In [156]:
class ValidDatasetFINAL(Data.Dataset):
    def __init__(self):
      self.inputs_data = torch.from_numpy(U_valid_input_norm.astype('float32')).cuda()
      self.inputs_data_bin = torch.from_numpy(U_valid_input_bin.astype('float32')).cuda()
      self.targets_data_ori = torch.from_numpy(valid_target.astype('float32'))
      # large true data, send accordingly

    def __len__(self):
      return self.inputs_data.shape[0]

    def __getitem__(self, idx):
      inputs_sliced = torch.squeeze(self.inputs_data[idx, ...])
      inputs_sliced_bin = torch.squeeze(self.inputs_data_bin[idx, ...])
      targets_ori_sliced = torch.squeeze(self.targets_data_ori[idx, ...]).cuda()

      return inputs_sliced, inputs_sliced_bin, targets_ori_sliced

valid_dataloader_final = torch.utils.data.DataLoader(ValidDatasetFINAL(), 
                                               batch_size=256,
                                               shuffle = True)

Define deep models:

In [157]:
class DenseModel(nn.Module):
    def __init__(self,
                 input_size=U_valid_input.shape[1],
                 output_size=valid_target.shape[1]):

        super(DenseModel, self).__init__()
        
        self.dense1 = nn.Sequential(
            nn.Linear(input_size*2, 2048),
            nn.ReLU())
        
        self.dense2 = nn.Sequential(
            nn.Linear(2048, 2048),
            nn.BatchNorm1d(2048),
            nn.ReLU(),
            nn.Dropout(0.5))
        
        self.dense3 = nn.Sequential(
            nn.Linear(2048, 2048),
            nn.BatchNorm1d(2048),
            nn.ReLU(),
            nn.Dropout(0.5))
        
        self.dense4 = nn.Sequential(
            nn.Linear(2048, 2048),
            nn.BatchNorm1d(2048),
            nn.ReLU(),
            nn.Dropout(0.5))
        
        self.dense5 = nn.Sequential(
            nn.Linear(2048, 2048),
            nn.BatchNorm1d(2048),
            nn.ReLU(),
            nn.Dropout(0.5))
                
        self.dense6 = nn.Sequential(
            nn.Linear(2048, output_size))

    def forward(self, input_val, input_bin):
        output = self.dense1(torch.cat((input_val, input_bin),1))
        output = self.dense2(output)
        output = self.dense3(output)
        output = self.dense4(output)
        output = self.dense5(output)
        output = self.dense6(output)
        return output

In [158]:
class DenseModelHVG(nn.Module):
    def __init__(self,
                 input_size=U_valid_input.shape[1],
                 output_size=hvg_index.shape[0]):

        super(DenseModelHVG, self).__init__()

        self.dense1 = nn.Sequential(
            nn.Linear(input_size*2, 2048),
            nn.ReLU())
        
        self.dense2 = nn.Sequential(
            nn.Linear(2048, 2048),
            nn.BatchNorm1d(2048),
            nn.ReLU(),
            nn.Dropout(0.5))
        
        self.dense3 = nn.Sequential(
            nn.Linear(2048, 2048),
            nn.BatchNorm1d(2048),
            nn.ReLU(),
            nn.Dropout(0.5))
        
        self.dense4 = nn.Sequential(
            nn.Linear(2048, 2048),
            nn.BatchNorm1d(2048),
            nn.ReLU(),
            nn.Dropout(0.5))
        
        self.dense5 = nn.Sequential(
            nn.Linear(2048, 2048),
            nn.BatchNorm1d(2048),
            nn.ReLU(),
            nn.Dropout(0.5))
                
        self.dense6 = nn.Sequential(
            nn.Linear(2048, output_size))

    def forward(self, input_val, input_bin):
        output = self.dense1(torch.cat((input_val,input_bin),1))
        output = self.dense2(output)
        output = self.dense3(output)
        output = self.dense4(output)
        output = self.dense5(output)
        output = self.dense6(output)
        return output

In [159]:
dense_model = DenseModel().cuda()
dense_model_hvg = DenseModelHVG().cuda()

Load model parameters:

In [160]:
dense_model = DenseModel().cuda()
dense_model.load_state_dict(torch.load('dense_model.pt'))

<All keys matched successfully>

In [161]:
dense_model_hvg = DenseModelHVG().cuda()
dense_model_hvg.load_state_dict(torch.load('dense_model_hvg.pt'))

<All keys matched successfully>

In [162]:
def pcorr_vec(X,Y):
  Xnorm = X-X.mean(axis=1).reshape(-1, 1)
  Ynorm = Y-Y.mean(axis=1).reshape(-1, 1)
  cov = torch.diagonal(torch.matmul(Xnorm,torch.transpose(Ynorm,1,0)))
  Xsigma2 = torch.diagonal(torch.matmul(Xnorm,torch.transpose(Xnorm,1,0)))
  Ysigma2 = torch.diagonal(torch.matmul(Ynorm,torch.transpose(Ynorm,1,0)))
  sigma = torch.sqrt(torch.mul(Xsigma2,Ysigma2))
  return torch.mean(cov/sigma)

In [163]:
dense_model_hvg.eval()
dense_model.eval()

pcorr_avg_true = np.zeros(shape=len(valid_dataloader_final))
weights = np.zeros(shape=len(valid_dataloader_final))

with torch.no_grad():
    for i, (inputs_val, inputs_bin, targets_ori) in enumerate(valid_dataloader_final):
      
      outputs = dense_model(inputs_val, inputs_bin)
      outputs_hvg = dense_model_hvg(inputs_val, inputs_bin)

      for j in range(outputs_hvg.shape[1]):
        outputs[:,int(hvg_index[j])] = outputs_hvg[:,j]

      weights[i] = targets_ori.shape[0]
      pcorr_avg_true[i] = pcorr_vec(targets_ori,outputs)

score_true = round(np.average(pcorr_avg_true,weights = weights),6)

score_true

0.669148