In [102]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import scanpy as sc
import os as os
from scipy.stats import spearmanr
from matplotlib import pyplot as plt

# Load models and selected genes

In [103]:
array_of_models = []
for i in range(7):
    array_of_models.append(tf.keras.models.load_model('Best_performance_model_for_clsuter_'+str(i)+'_final.h5'))

In [104]:
selected_gene_index = []
for i in range(7):
    selected_gene_index.append(np.load('Selected_genes/selected_gene'+str(i)+'.npy'))

In [105]:
gene_names = np.load('Selected_genes/Gene_names.npy', allow_pickle=True)

In [106]:
cluster_names = ['CD4+ T cells', 'Monocytes', 'NK cells', 'B cells', 'CD8+ T cells',
       'Platelets', 'DC cells']

# Preprocessing and prediction

In [107]:
def Preprocessing(countmatrix,colname_of_gene_name, colname_of_counts, gene_names=gene_names):
    X = []
    scaler = MinMaxScaler()
    for i in gene_names:
        if i in countmatrix[colname_of_gene_name].values:
            X.append(np.log1p(countmatrix[countmatrix[colname_of_gene_name]==i][colname_of_counts].values)[0])
        else:
            print('Warning! Missing an expression! '+i)
            X.append(0)
    X = scaler.fit_transform(np.array(X).reshape(-1,1)).flatten()
    return X

In [108]:
def PredictWithSelection(Xs, selected_gene_index=selected_gene_index,model_list=array_of_models):
    y_prediction_list = []
    results_df = pd.DataFrame()
    for cluster in range(0,7):
        print('Deconvoluting cluster '+str(cluster))
        sample_id = 0
        y_prediction =[]
        if cluster == 6:
            X = Xs[selected_gene_index[cluster]]
            print('Deconvoluting sample '+str(sample_id))
            y = model_list[cluster].predict(np.array([X]))[0]
            y_prediction.append(y.flatten()[0]/10)
            sample_id+=1
        else:
            X = Xs[selected_gene_index[cluster]]
            print('Deconvoluting sample '+str(sample_id))
            y = model_list[cluster].predict(np.array([X]))[0]
            y_prediction.append(y.flatten()[0])
            sample_id+=1
        y_prediction = np.array(y_prediction)
        y_prediction_list.append(y_prediction)
    results_df['Cell type'] = cluster_names
    results_df['Proportion'] = np.array(y_prediction_list).flatten()
    return(results_df)

In [109]:
'''
1. run Preprocessing function (this function processes one sample at a time, 
process batches through the means of loops)
2. run PredictWithSelection function to get a list with predicted cell proportions
'''

'\n1. run Preprocessing function (this function processes one sample at a time, \nprocess batches through the means of loops)\n2. run PredictWithSelection function to get a list with predicted cell proportions\n'