In [None]:
!git clone https://github.com/Kei0501/LincSpectr

# Installation

In [None]:
!pip install scanpy==1.9.6 ssqueezepy==0.6.4 pynwb==2.5.0

# Import packages

In [None]:
import numpy as np
import random
import torch
import umap
import seaborn as sns
%matplotlib inline
from matplotlib import pyplot as plt

import utils
import modules

# Load data and preprocessing
LincSpectr requires single-cell transcriptome data and CWT transformed electrophysiological data. Transcriptome data were processed in workflow.py.
CWT transformed electrophysiological data were saved as npy format.

In [None]:
#prepare electrophysiological data
file_names = utils.collect_filename("./000008/*")
cell_list = utils.collect_cellname(file_names)
utils.transform_efeatures(file_names, "./data_for_VAE/",cell_list)

# Training model
LincSpectr trains three models and estimates the electrophysiological features from transcriptomic features.

In [None]:
import workflow

In [None]:
t_vae = workflow.t_vae
e_vae = workflow.e_vae
linkz_model = workflow.linkz_model

# Visualization of latent space

In [None]:
t_test, e_test = [], []
for i in range(len(workflow.dataset)):
    t_test.append(workflow.dataset[i][0])
    e_test.append(workflow.dataset[i][1])
test_x = torch.stack(t_test, dim = 0)
test_xcell_id = torch.stack(e_test, dim = 0)
test_x = test_x.to(workflow.device)
test_xcell_id = test_xcell_id.to(workflow.device)

t_vae.to(workflow.device)
with torch.no_grad():
    t_vae.eval()
    tz, qz, xld = t_vae(test_x)

reducer = umap.UMAP(n_neighbors=15,min_dist=0.01)
embedding = reducer.fit_transform(tz.cpu().detach().numpy())

sns.scatterplot(x = embedding[:,0],y = embedding[:,1],hue=workflow.adata.obs['RNA family'])
plt.legend(loc='upper left',bbox_to_anchor=(1.0,1.0))

In [None]:
e_vae.to(workflow.device)
with torch.no_grad():
    e_vae.eval()
    ez, qz, ld_img = e_vae(test_xcell_id.view(-1,workflow.set_timeax*workflow.set_freqax))
reducer = umap.UMAP(n_neighbors=15,min_dist=0.1)
embedding = reducer.fit_transform(ez.cpu().detach().numpy())

sns.scatterplot(x = embedding[:,0],y = embedding[:,1],hue=workflow.adata.obs['RNA family'])
plt.legend(loc='upper left',bbox_to_anchor=(1.0,1.0))

# Estimate e-features from t-features

In [None]:
LincSpectr = modules.LincSpectr

In [None]:
rand_num = random.randrange(len(workflow.valid_list))
cell_name1 = workflow.valid_list[rand_num]
rand_num = random.randrange(len(workflow.valid_list))
cell_name2 = workflow.valid_list[rand_num]
sample_data = "./data_for_VAE/20171204_sample_2.npy"
utils.show_prediction(sample_data,cell_name1, cell_name2,workflow.adata,LincSpectr,t_vae,e_vae,linkz_model,workflow.val_ez_train,workflow.val_ez)

# Inverse analysis of the model

In [None]:
#Inverse analysis of Vip cells
avr_express = utils.average_expression(workflow.adata,workflow.count_mat,"Vip")
image_shape = (workflow.set_timeax,workflow.set_freqax)
u_pick, top_genes, top_expression = utils.inverse_analysis(avr_express,workflow.adata,LincSpectr,t_vae,e_vae,linkz_model,workflow.val_ez_train,workflow.val_ez,N=10, image_shape=image_shape)

In [None]:
u_pick = u_pick.reshape(np.load(sample_data).shape)
upick_image = u_pick.to('cpu').detach().numpy().copy()
plt.figure(figsize=(10,8))
plt.imshow(upick_image, aspect='auto', cmap='turbo', vmin=0)

In [None]:
plt.bar(top_genes,top_expression)
plt.xticks(rotation=45)
plt.show()

In [None]:
for top_gene in top_genes:
    print(top_gene)