In [1]:
import sys
sys.path.append('..')
from data.data_reader import *

In [2]:
import tqdm
import scanpy as sc

import pandas as pd

In [3]:
download_file('https://plus.figshare.com/ndownloader/files/35775512','35775512.h5ad')
adata_orig = sc.read_h5ad("35775512.h5ad")
adata_orig.X[adata_orig.X == float("inf")]=0

File downloaded successfully to 35775512.h5ad


In [4]:
adata_orig.obs['gene_name']=list(pd.Series(adata_orig.obs.index).apply(lambda x:x.split("_")[1]))
adata_orig.obs['id']=range(adata_orig.obs.shape[0])

In [5]:
import tqdm
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.figure_factory as ff

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [6]:
def cosine_similarity(A):
  AAt=np.matmul(A,A.transpose())
  n_A=np.sqrt((A**2).sum(axis=1)).reshape(-1,1)
  n_A=np.matmul(n_A,n_A.transpose())
  return AAt/(n_A)

In [7]:
class X_dataset(Dataset):
    def __init__(self,data):
        self.data=data
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return {'x':torch.tensor(self.data.X[idx]),'c':torch.tensor(self.data.obs.iloc[idx]['core_control'])}


In [8]:
dataset=X_dataset(adata_orig)
train_loader=DataLoader(dataset,batch_size=32,shuffle=True)

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class Encoder(nn.Module):
    def __init__(self, latent_dim=10,kl_coef=0.000001):
        super(Encoder, self).__init__()
        self.latent_dim=latent_dim
        self.kl_coef=kl_coef
        self.dense1=nn.Linear(dataset[0]['x'].shape[0],1024)
        self.bn1=nn.BatchNorm1d(1024)
        self.dense2=nn.Linear(1024,256)
        self.bn2=nn.BatchNorm1d(256)
        self.mu=nn.Linear(256, latent_dim)
        self.logvar=nn.Linear(256, latent_dim)
        self.kl = 0
    def reparameterize(self, mu , logvar):
        std = torch.exp(logvar*0.5)
        eps = torch.randn_like(std).to(device)
        z = mu + eps * std
        return z
    def forward(self, x):
        bn=x.size(0)
        x=F.relu(self.bn1(self.dense1(x)))
        x=F.relu(self.bn2(self.dense2(x)))
        mu =  self.mu(x)
        logvar = self.logvar(x)
        z=self.reparameterize(mu , logvar)
        self.kl = 0.5*(logvar.exp() + mu**2 - logvar - 1).sum()*self.kl_coef
        return z


class Decoder(nn.Module):
    def __init__(self, latent_dim=8):
        super(Decoder, self).__init__()
        self.dense1=nn.Linear(latent_dim,256)
        self.bn1=nn.BatchNorm1d(256)
        self.dense2=nn.Linear(256,1024)
        self.bn2=nn.BatchNorm1d(1024)
        self.out=nn.Linear(1024,dataset[0]['x'].shape[0])

    def forward(self, z):
        z = F.relu(self.bn1(self.dense1(z)))
        z = F.relu(self.bn2(self.dense2(z)))
        z = self.out(z)
        return z

class VariationalAutoencoder(nn.Module):
    def __init__(self, latent_dims=10,kl_coef=0.000001):
        super(VariationalAutoencoder, self).__init__()
        self.encoder = Encoder(latent_dims,kl_coef).to(device)
        self.decoder = Decoder(latent_dims).to(device)

    def forward(self, x):
        z = self.encoder(x)
        return self.decoder(z)


In [10]:
# autoencoder=VariationalAutoencoder(20,0.1)
autoencoder=VariationalAutoencoder(20,1e-9)
opt = torch.optim.Adam(autoencoder.parameters(),lr=0.001)
loss_fn=torch.nn.MSELoss()


  return {'x':torch.tensor(self.data.X[idx]),'c':torch.tensor(self.data.obs.iloc[idx]['core_control'])}


In [11]:
autoencoder.train()
for epoch in range(100):
    train_loss1=0
    train_loss2=0
    autoencoder.train()
    for batch in tqdm.tqdm(train_loader):
        x = batch['x'].to(device) # GPU
        c = batch['c'].to(device) # GPU
        opt.zero_grad()
        x_hat = autoencoder(x)
        loss1=loss_fn(x_hat,x)
        loss2=autoencoder.encoder.kl
        loss = loss1 + loss2
        train_loss1+=loss1.detach().cpu().numpy()
        train_loss2+=loss2.detach().cpu().numpy()
        loss.backward()
        opt.step()
    print(f"TRAIN: EPOCH {epoch}: MSE: {train_loss1/len(train_loader)}, KL_LOSS: {train_loss2/len(train_loader)}")



  return {'x':torch.tensor(self.data.X[idx]),'c':torch.tensor(self.data.obs.iloc[idx]['core_control'])}
100%|██████████| 84/84 [00:01<00:00, 56.53it/s]


TRAIN: EPOCH 0: MSE: 0.07599483567866541, KL_LOSS: 1.2950634110421247e-05


100%|██████████| 84/84 [00:00<00:00, 99.57it/s] 


TRAIN: EPOCH 1: MSE: 0.0530475355862152, KL_LOSS: 1.1833086121203537e-05


100%|██████████| 84/84 [00:00<00:00, 93.47it/s] 


TRAIN: EPOCH 2: MSE: 0.05021551954338238, KL_LOSS: 5.3096080082901784e-06


100%|██████████| 84/84 [00:00<00:00, 97.62it/s] 


TRAIN: EPOCH 3: MSE: 0.04816164717166906, KL_LOSS: 7.607900126263953e-06


100%|██████████| 84/84 [00:00<00:00, 99.79it/s] 


TRAIN: EPOCH 4: MSE: 0.04639153293378297, KL_LOSS: 2.6944666390569354e-06


100%|██████████| 84/84 [00:00<00:00, 102.91it/s]


TRAIN: EPOCH 5: MSE: 0.045034820147390874, KL_LOSS: 2.764525152629392e-06


100%|██████████| 84/84 [00:00<00:00, 94.96it/s] 


TRAIN: EPOCH 6: MSE: 0.04314912817928763, KL_LOSS: 2.9201094445765886e-06


100%|██████████| 84/84 [00:00<00:00, 94.53it/s]


TRAIN: EPOCH 7: MSE: 0.042327215163303275, KL_LOSS: 3.1695516733114867e-06


100%|██████████| 84/84 [00:01<00:00, 79.75it/s]


TRAIN: EPOCH 8: MSE: 0.042004525306678954, KL_LOSS: 3.4079827227764154e-06


100%|██████████| 84/84 [00:00<00:00, 94.13it/s] 


TRAIN: EPOCH 9: MSE: 0.0409293006662102, KL_LOSS: 3.3242593341734996e-06


100%|██████████| 84/84 [00:00<00:00, 89.31it/s]


TRAIN: EPOCH 10: MSE: 0.039465027439984535, KL_LOSS: 3.4390792967271753e-06


100%|██████████| 84/84 [00:00<00:00, 95.78it/s]


TRAIN: EPOCH 11: MSE: 0.03964578067617757, KL_LOSS: 3.4630598406693025e-06


100%|██████████| 84/84 [00:00<00:00, 93.82it/s]


TRAIN: EPOCH 12: MSE: 0.03773038752288336, KL_LOSS: 3.577371701556417e-06


100%|██████████| 84/84 [00:00<00:00, 97.25it/s] 


TRAIN: EPOCH 13: MSE: 0.03934165591462737, KL_LOSS: 3.74820151212892e-06


100%|██████████| 84/84 [00:00<00:00, 94.17it/s] 


TRAIN: EPOCH 14: MSE: 0.03920798861820783, KL_LOSS: 4.03948489845009e-06


100%|██████████| 84/84 [00:00<00:00, 86.72it/s]


TRAIN: EPOCH 15: MSE: 0.03711988741443271, KL_LOSS: 3.864219452441917e-06


100%|██████████| 84/84 [00:00<00:00, 95.94it/s] 


TRAIN: EPOCH 16: MSE: 0.036783952465546985, KL_LOSS: 3.932616114828609e-06


100%|██████████| 84/84 [00:00<00:00, 98.96it/s] 


TRAIN: EPOCH 17: MSE: 0.036082794512843804, KL_LOSS: 3.835091609700958e-06


100%|██████████| 84/84 [00:00<00:00, 95.00it/s] 


TRAIN: EPOCH 18: MSE: 0.03599977262672924, KL_LOSS: 3.829588016352707e-06


100%|██████████| 84/84 [00:00<00:00, 95.65it/s]


TRAIN: EPOCH 19: MSE: 0.035131082515276614, KL_LOSS: 3.830599084830326e-06


100%|██████████| 84/84 [00:00<00:00, 96.53it/s] 


TRAIN: EPOCH 20: MSE: 0.03480945030848185, KL_LOSS: 3.896504974168888e-06


100%|██████████| 84/84 [00:00<00:00, 91.95it/s]


TRAIN: EPOCH 21: MSE: 0.035288261560102306, KL_LOSS: 3.955649755685694e-06


100%|██████████| 84/84 [00:00<00:00, 95.95it/s] 


TRAIN: EPOCH 22: MSE: 0.034001547089290055, KL_LOSS: 3.940499403770878e-06


100%|██████████| 84/84 [00:00<00:00, 94.82it/s]


TRAIN: EPOCH 23: MSE: 0.03344839771411249, KL_LOSS: 3.8875749380297616e-06


100%|██████████| 84/84 [00:00<00:00, 96.22it/s]


TRAIN: EPOCH 24: MSE: 0.03371377573126838, KL_LOSS: 4.0248253028385945e-06


100%|██████████| 84/84 [00:00<00:00, 94.75it/s]


TRAIN: EPOCH 25: MSE: 0.0333868336109888, KL_LOSS: 4.031150643597703e-06


100%|██████████| 84/84 [00:00<00:00, 95.08it/s]


TRAIN: EPOCH 26: MSE: 0.033119641177888424, KL_LOSS: 4.100250120722021e-06


100%|██████████| 84/84 [00:00<00:00, 95.35it/s]


TRAIN: EPOCH 27: MSE: 0.032804881710381734, KL_LOSS: 4.153943066176864e-06


100%|██████████| 84/84 [00:00<00:00, 92.14it/s]


TRAIN: EPOCH 28: MSE: 0.031988104716652914, KL_LOSS: 4.1703624907270186e-06


100%|██████████| 84/84 [00:00<00:00, 99.22it/s] 


TRAIN: EPOCH 29: MSE: 0.032451020110221135, KL_LOSS: 4.16757832984634e-06


100%|██████████| 84/84 [00:00<00:00, 95.36it/s]


TRAIN: EPOCH 30: MSE: 0.031427642640968166, KL_LOSS: 4.183146359082457e-06


100%|██████████| 84/84 [00:00<00:00, 99.80it/s] 


TRAIN: EPOCH 31: MSE: 0.032462878130553735, KL_LOSS: 4.43093742996141e-06


100%|██████████| 84/84 [00:00<00:00, 97.61it/s] 


TRAIN: EPOCH 32: MSE: 0.031166721889305683, KL_LOSS: 4.333155253616064e-06


100%|██████████| 84/84 [00:00<00:00, 98.78it/s] 


TRAIN: EPOCH 33: MSE: 0.030837562910857656, KL_LOSS: 4.389222402507931e-06


100%|██████████| 84/84 [00:00<00:00, 94.48it/s]


TRAIN: EPOCH 34: MSE: 0.03066210885576549, KL_LOSS: 4.436154835643503e-06


100%|██████████| 84/84 [00:00<00:00, 97.63it/s] 


TRAIN: EPOCH 35: MSE: 0.030205131353189547, KL_LOSS: 4.407532888503088e-06


100%|██████████| 84/84 [00:00<00:00, 97.11it/s]


TRAIN: EPOCH 36: MSE: 0.03012133072618218, KL_LOSS: 4.45226449874965e-06


100%|██████████| 84/84 [00:00<00:00, 90.94it/s]


TRAIN: EPOCH 37: MSE: 0.029973993321792, KL_LOSS: 4.5250002277238565e-06


100%|██████████| 84/84 [00:00<00:00, 93.76it/s]


TRAIN: EPOCH 38: MSE: 0.029883691553203834, KL_LOSS: 4.512393911140645e-06


100%|██████████| 84/84 [00:00<00:00, 93.34it/s]


TRAIN: EPOCH 39: MSE: 0.029291744787423385, KL_LOSS: 4.532272387656294e-06


100%|██████████| 84/84 [00:00<00:00, 95.09it/s]


TRAIN: EPOCH 40: MSE: 0.030080470500425213, KL_LOSS: 4.656254870518578e-06


100%|██████████| 84/84 [00:00<00:00, 88.82it/s]


TRAIN: EPOCH 41: MSE: 0.029204563947305792, KL_LOSS: 4.654600220852617e-06


100%|██████████| 84/84 [00:00<00:00, 86.62it/s]


TRAIN: EPOCH 42: MSE: 0.029012578628247694, KL_LOSS: 4.5851486934874885e-06


100%|██████████| 84/84 [00:00<00:00, 90.78it/s]


TRAIN: EPOCH 43: MSE: 0.028238427359610796, KL_LOSS: 4.604516052928548e-06


100%|██████████| 84/84 [00:00<00:00, 91.66it/s]


TRAIN: EPOCH 44: MSE: 0.028729276310297706, KL_LOSS: 4.688118308205871e-06


100%|██████████| 84/84 [00:00<00:00, 92.27it/s]


TRAIN: EPOCH 45: MSE: 0.02823957104590677, KL_LOSS: 4.690481023858627e-06


100%|██████████| 84/84 [00:00<00:00, 93.32it/s]


TRAIN: EPOCH 46: MSE: 0.027948697008902117, KL_LOSS: 4.676884337922751e-06


100%|██████████| 84/84 [00:00<00:00, 94.91it/s]


TRAIN: EPOCH 47: MSE: 0.027195735479749385, KL_LOSS: 4.692308972867717e-06


100%|██████████| 84/84 [00:00<00:00, 90.84it/s]


TRAIN: EPOCH 48: MSE: 0.028735430622916846, KL_LOSS: 4.908642985074471e-06


100%|██████████| 84/84 [00:00<00:00, 92.00it/s]


TRAIN: EPOCH 49: MSE: 0.027945596746922957, KL_LOSS: 4.7492534836887245e-06


100%|██████████| 84/84 [00:00<00:00, 94.06it/s]


TRAIN: EPOCH 50: MSE: 0.028170684308168433, KL_LOSS: 4.80940239066556e-06


100%|██████████| 84/84 [00:00<00:00, 92.01it/s]


TRAIN: EPOCH 51: MSE: 0.02823577710382995, KL_LOSS: 4.829300729895858e-06


100%|██████████| 84/84 [00:00<00:00, 89.02it/s]


TRAIN: EPOCH 52: MSE: 0.027584667450615337, KL_LOSS: 4.880251080543357e-06


100%|██████████| 84/84 [00:00<00:00, 94.91it/s] 


TRAIN: EPOCH 53: MSE: 0.02736465952226094, KL_LOSS: 4.918039551393165e-06


100%|██████████| 84/84 [00:01<00:00, 83.88it/s]


TRAIN: EPOCH 54: MSE: 0.02737184897774742, KL_LOSS: 4.9868401349937945e-06


100%|██████████| 84/84 [00:00<00:00, 90.43it/s]


TRAIN: EPOCH 55: MSE: 0.027762185870891527, KL_LOSS: 5.027743194811289e-06


100%|██████████| 84/84 [00:00<00:00, 94.97it/s]


TRAIN: EPOCH 56: MSE: 0.027389774254212778, KL_LOSS: 5.023120281965107e-06


100%|██████████| 84/84 [00:00<00:00, 92.08it/s]


TRAIN: EPOCH 57: MSE: 0.026029419752636125, KL_LOSS: 5.060797472348592e-06


100%|██████████| 84/84 [00:00<00:00, 88.76it/s]


TRAIN: EPOCH 58: MSE: 0.02537402430815356, KL_LOSS: 5.042009761490633e-06


100%|██████████| 84/84 [00:00<00:00, 91.43it/s]


TRAIN: EPOCH 59: MSE: 0.025519836583130416, KL_LOSS: 5.093905610378681e-06


100%|██████████| 84/84 [00:01<00:00, 78.17it/s] 


TRAIN: EPOCH 60: MSE: 0.02572247463588913, KL_LOSS: 5.11300240597328e-06


100%|██████████| 84/84 [00:00<00:00, 95.95it/s] 


TRAIN: EPOCH 61: MSE: 0.02536081491659085, KL_LOSS: 5.162732285108567e-06


100%|██████████| 84/84 [00:00<00:00, 95.56it/s] 


TRAIN: EPOCH 62: MSE: 0.025480463989965972, KL_LOSS: 5.1525129563179645e-06


100%|██████████| 84/84 [00:00<00:00, 89.33it/s] 


TRAIN: EPOCH 63: MSE: 0.025141757003785598, KL_LOSS: 5.206757334365518e-06


100%|██████████| 84/84 [00:00<00:00, 88.65it/s]


TRAIN: EPOCH 64: MSE: 0.025050673811208634, KL_LOSS: 5.300580948555546e-06


100%|██████████| 84/84 [00:00<00:00, 87.91it/s]


TRAIN: EPOCH 65: MSE: 0.025251849221863916, KL_LOSS: 5.3168791748162e-06


100%|██████████| 84/84 [00:00<00:00, 87.06it/s]


TRAIN: EPOCH 66: MSE: 0.02454369424265765, KL_LOSS: 5.261780916585156e-06


100%|██████████| 84/84 [00:00<00:00, 89.55it/s] 


TRAIN: EPOCH 67: MSE: 0.024536439018057927, KL_LOSS: 5.292060926703493e-06


100%|██████████| 84/84 [00:00<00:00, 90.93it/s]


TRAIN: EPOCH 68: MSE: 0.02436819909850047, KL_LOSS: 5.324255339257347e-06


100%|██████████| 84/84 [00:00<00:00, 103.20it/s]


TRAIN: EPOCH 69: MSE: 0.024530097465252594, KL_LOSS: 5.360809398423173e-06


100%|██████████| 84/84 [00:00<00:00, 104.23it/s]


TRAIN: EPOCH 70: MSE: 0.02414474536531738, KL_LOSS: 5.295600138716898e-06


100%|██████████| 84/84 [00:00<00:00, 104.79it/s]


TRAIN: EPOCH 71: MSE: 0.024458983380879675, KL_LOSS: 5.4361909070350865e-06


100%|██████████| 84/84 [00:00<00:00, 98.90it/s] 


TRAIN: EPOCH 72: MSE: 0.02420745665828387, KL_LOSS: 5.4327247740037e-06


100%|██████████| 84/84 [00:00<00:00, 104.26it/s]


TRAIN: EPOCH 73: MSE: 0.02420889905520848, KL_LOSS: 5.484465483667529e-06


100%|██████████| 84/84 [00:00<00:00, 101.22it/s]


TRAIN: EPOCH 74: MSE: 0.024082305575055734, KL_LOSS: 5.492600718883166e-06


100%|██████████| 84/84 [00:00<00:00, 103.71it/s]


TRAIN: EPOCH 75: MSE: 0.023616079773221697, KL_LOSS: 5.5104570124274775e-06


100%|██████████| 84/84 [00:00<00:00, 100.46it/s]


TRAIN: EPOCH 76: MSE: 0.02373820554376358, KL_LOSS: 5.6661794068142105e-06


100%|██████████| 84/84 [00:00<00:00, 93.56it/s] 


TRAIN: EPOCH 77: MSE: 0.023728694788934218, KL_LOSS: 5.5828421907444e-06


100%|██████████| 84/84 [00:00<00:00, 100.40it/s]


TRAIN: EPOCH 78: MSE: 0.02304815747109907, KL_LOSS: 5.6051310853685465e-06


100%|██████████| 84/84 [00:00<00:00, 87.16it/s]


TRAIN: EPOCH 79: MSE: 0.022621760189178445, KL_LOSS: 5.56946694511377e-06


100%|██████████| 84/84 [00:00<00:00, 88.76it/s]


TRAIN: EPOCH 80: MSE: 0.022874529517832257, KL_LOSS: 5.5560936267457215e-06


100%|██████████| 84/84 [00:00<00:00, 96.47it/s] 


TRAIN: EPOCH 81: MSE: 0.02307527709663624, KL_LOSS: 5.600390891015108e-06


100%|██████████| 84/84 [00:00<00:00, 95.18it/s]


TRAIN: EPOCH 82: MSE: 0.023351280617394617, KL_LOSS: 5.610278397701525e-06


100%|██████████| 84/84 [00:00<00:00, 97.52it/s] 


TRAIN: EPOCH 83: MSE: 0.022471198967347544, KL_LOSS: 5.518442511250517e-06


100%|██████████| 84/84 [00:01<00:00, 81.02it/s]


TRAIN: EPOCH 84: MSE: 0.02217579384644826, KL_LOSS: 5.555184868301564e-06


100%|██████████| 84/84 [00:00<00:00, 87.57it/s]


TRAIN: EPOCH 85: MSE: 0.022516784657325064, KL_LOSS: 5.602805480347819e-06


100%|██████████| 84/84 [00:00<00:00, 89.17it/s] 


TRAIN: EPOCH 86: MSE: 0.022547257066305195, KL_LOSS: 5.631432161732976e-06


100%|██████████| 84/84 [00:00<00:00, 92.67it/s]


TRAIN: EPOCH 87: MSE: 0.02215177361809072, KL_LOSS: 5.62511966217287e-06


100%|██████████| 84/84 [00:00<00:00, 95.86it/s]


TRAIN: EPOCH 88: MSE: 0.022108280587764012, KL_LOSS: 5.647954583851277e-06


100%|██████████| 84/84 [00:00<00:00, 105.15it/s]


TRAIN: EPOCH 89: MSE: 0.021912939181285247, KL_LOSS: 5.663390890644916e-06


100%|██████████| 84/84 [00:00<00:00, 97.22it/s] 


TRAIN: EPOCH 90: MSE: 0.021684546999278523, KL_LOSS: 5.664510451554504e-06


100%|██████████| 84/84 [00:01<00:00, 75.54it/s]


TRAIN: EPOCH 91: MSE: 0.021637359412298315, KL_LOSS: 5.6580305616600005e-06


100%|██████████| 84/84 [00:00<00:00, 94.63it/s]


TRAIN: EPOCH 92: MSE: 0.0215937710101051, KL_LOSS: 5.688579866121775e-06


100%|██████████| 84/84 [00:01<00:00, 77.73it/s] 


TRAIN: EPOCH 93: MSE: 0.02153025514313153, KL_LOSS: 5.705296519495494e-06


100%|██████████| 84/84 [00:01<00:00, 71.79it/s]


TRAIN: EPOCH 94: MSE: 0.021371551589774235, KL_LOSS: 5.679728663684121e-06


100%|██████████| 84/84 [00:00<00:00, 86.70it/s]


TRAIN: EPOCH 95: MSE: 0.021308498767515022, KL_LOSS: 5.715194404166098e-06


100%|██████████| 84/84 [00:00<00:00, 88.09it/s] 


TRAIN: EPOCH 96: MSE: 0.021468809827984797, KL_LOSS: 5.741109226792538e-06


100%|██████████| 84/84 [00:01<00:00, 83.13it/s]


TRAIN: EPOCH 97: MSE: 0.022693345611471506, KL_LOSS: 5.882818716903178e-06


100%|██████████| 84/84 [00:00<00:00, 95.93it/s]


TRAIN: EPOCH 98: MSE: 0.02128689396860344, KL_LOSS: 5.8265818822855225e-06


100%|██████████| 84/84 [00:00<00:00, 95.55it/s] 

TRAIN: EPOCH 99: MSE: 0.02129365716661726, KL_LOSS: 5.842922730283005e-06





In [12]:
autoencoder.eval()
encoded_x=[]
cs=[]
for rec in tqdm.tqdm(dataset):
    x = rec['x'].reshape(1,-1).to(device) # GPU
    c = rec['c'].reshape(1,).to(device) # GPU
    encoded_x.append(autoencoder.encoder(x).cpu().detach().numpy())
    cs.append(c.cpu().detach().numpy())
encoded_x=np.concatenate(encoded_x,axis=0)
encoded_x=(encoded_x-encoded_x.mean(axis=0,keepdims=True))/encoded_x.std(axis=0,keepdims=True)
cs=np.concatenate(cs,axis=0)
df_to_be_shown=pd.DataFrame(encoded_x,columns=[f'f{i}' for i in range(encoded_x.shape[1])])
df_to_be_shown['control']=cs

  return {'x':torch.tensor(self.data.X[idx]),'c':torch.tensor(self.data.obs.iloc[idx]['core_control'])}
100%|██████████| 2679/2679 [00:02<00:00, 1066.78it/s]


In [13]:
cos_sim_f=cosine_similarity(np.array(df_to_be_shown.drop(['control'], axis=1)))

In [14]:
similarity_matrix=np.zeros(cos_sim_f.shape)
similarity_db=hu_data_loader()

for gene_name in tqdm.tqdm(adata_orig.obs.gene_name.unique()):
    query=query_hu_data(similarity_db,gene_name)
    for q in query:
        if q in adata_orig.obs.gene_name.values:
            y_indices=adata_orig.obs[adata_orig.obs.gene_name==q].id
            x_indices=adata_orig.obs[adata_orig.obs.gene_name==gene_name].id
            for x_id in x_indices:
                for y_id in y_indices:
                    similarity_matrix[y_id,x_id]=1
                    similarity_matrix[x_id,y_id]=1

cos_sim_f_flatten=cos_sim_f.reshape(-1,)
similarity_matrix_flatten=similarity_matrix.reshape(-1,)
cos_sim_f_flatten1=cos_sim_f_flatten[similarity_matrix_flatten==1]
cos_sim_f_flatten0=cos_sim_f_flatten[similarity_matrix_flatten==0]

File downloaded successfully to humap2_complexes_20200809.txt


100%|██████████| 2394/2394 [00:37<00:00, 64.04it/s] 


In [15]:
choice=np.random.choice(cos_sim_f_flatten1.shape[0], 2048)
cos_sim_f_flatten1=cos_sim_f_flatten1[choice]
cos_sim_f_flatten1=pd.DataFrame(cos_sim_f_flatten1,columns=['correlations'])
fig=px.violin(cos_sim_f_flatten1, y='correlations',width=500, height=400,title="SIMILARS")
fig.show()


choice=np.random.choice(cos_sim_f_flatten0.shape[0], 2048)
cos_sim_f_flatten0=cos_sim_f_flatten0[choice]
cos_sim_f_flatten0=pd.DataFrame(cos_sim_f_flatten0,columns=['correlations'])
fig=px.violin(cos_sim_f_flatten0, y='correlations',width=500, height=400,title="Not SIMILARS")
fig.show()

In [16]:
print("Not SIMILARS MEAN:",cos_sim_f_flatten0.mean())
print("SIMILARS MEAN:",cos_sim_f_flatten1.mean())

Not SIMILARS MEAN: correlations    0.031357
dtype: float32
SIMILARS MEAN: correlations    0.415202
dtype: float32


In [32]:
fig=px.scatter(df_to_be_shown,x='f0',y='f1',color='control',width=500, height=400)
fig.show()
fig=px.scatter(df_to_be_shown,x='f2',y='f3',color='control',width=500, height=400)
fig.show()
fig=px.scatter(df_to_be_shown,x='f4',y='f5',color='control',width=500, height=400)
fig.show()











