In [None]:
#The following is intended for replication of the data graphed in figure 1. 
#It will walk through how to get data for 1) human single cell KIF21A expression for beta, alpha, delta cell types in non-diseased cells (figure 1 H);
#2) human single cell KIF21A expression for beta, alpha, delta cell types in non-diseased cells and type-2 diabetes cells (figure 1 D-F);
#3) mouse single cell KIF21A expression for beta, alpha, delta cell types in non-diseased cells (figure 1 I);

In [None]:
import scanpy as sc
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [None]:
import requests

#mouse pancreatic scRNA file
url = 'https://datasets.cellxgene.cziscience.com/49243c50-bf0c-4b10-87f8-55ec9f455399.h5ad' 
response = requests.get(url)

#Save the file locally
with open('49243c50-bf0c-4b10-87f8-55ec9f455399.h5ad', 'wb') as file:
    file.write(response.content)

In [None]:
#human pancreatic scRNA file
url = 'https://datasets.cellxgene.cziscience.com/d45ff50f-90e1-4983-9388-c5b2ca1f2866.h5ad'
response = requests.get(url)

#Save the file locally
with open('d45ff50f-90e1-4983-9388-c5b2ca1f2866.h5ad', 'wb') as file:
    file.write(response.content)

In [None]:
sc_MUS_data = sc.read_h5ad("49243c50-bf0c-4b10-87f8-55ec9f455399.h5ad")

In [None]:
sc_HUM_data = sc.read_h5ad("d45ff50f-90e1-4983-9388-c5b2ca1f2866.h5ad")

In [None]:
#We will go through the mouse data first and then come back to the human data

In [None]:
output_h5ad = sc_MUS_data.copy()
sc.pp.normalize_total(output_h5ad)
sc.pp.log1p(output_h5ad)

In [None]:
output_h5ad = output_h5ad[output_h5ad.obs.disease == 'normal', :]

In [None]:
beta_h5ad = output_h5ad[output_h5ad.obs['cell_type'] == 'type B pancreatic cell']
alpha_h5ad = output_h5ad[output_h5ad.obs['cell_type'] == 'pancreatic A cell']
delta_h5ad = output_h5ad[output_h5ad.obs['cell_type'] == 'pancreatic D cell']

In [None]:
gene_name = 'Kif21a' #note the lower case letters used! this is specific to mouse genes

beta_expr = beta_h5ad[:, beta_h5ad.var.feature_name == gene_name].X.toarray().flatten()
alpha_expr = alpha_h5ad[:, alpha_h5ad.var.feature_name == gene_name].X.toarray().flatten()
delta_expr = delta_h5ad[:, delta_h5ad.var.feature_name == gene_name].X.toarray().flatten()

beta_expr = list(filter(lambda x: x != 0, beta_expr))
alpha_expr = list(filter(lambda x: x != 0, alpha_expr))
delta_expr = list(filter(lambda x: x != 0, delta_expr))

max_length = max(len(beta_expr), len(alpha_expr), len(delta_expr))
df = pd.DataFrame({
    'Beta cell': beta_expr + [None] * (max_length - len(beta_expr)),
    'Alpha cell': alpha_expr + [None] * (max_length - len(alpha_expr)),
    'Delta cell': delta_expr + [None] * (max_length - len(delta_expr)),
})
df.to_csv('sc_MUS_Kif21a_expression_cell_types.csv', index=False)

In [None]:
#We will now go through human data

In [None]:
output_h5ad = sc_HUM_data.copy()
sc.pp.normalize_total(output_h5ad)
sc.pp.log1p(output_h5ad)

In [None]:
output_h5ad = output_h5ad[output_h5ad.obs.disease == 'normal', :]

In [None]:
beta_h5ad = output_h5ad[output_h5ad.obs['cell_type'] == 'type B pancreatic cell']
alpha_h5ad = output_h5ad[output_h5ad.obs['cell_type'] == 'pancreatic A cell']
delta_h5ad = output_h5ad[output_h5ad.obs['cell_type'] == 'pancreatic D cell']

In [None]:
gene_name = 'KIF21A' #note the upper case letters used! this is specific to human genes

beta_expr = beta_h5ad[:, beta_h5ad.var.feature_name == gene_name].X.toarray().flatten()
alpha_expr = alpha_h5ad[:, alpha_h5ad.var.feature_name == gene_name].X.toarray().flatten()
delta_expr = delta_h5ad[:, delta_h5ad.var.feature_name == gene_name].X.toarray().flatten()

beta_expr = list(filter(lambda x: x != 0, beta_expr))
alpha_expr = list(filter(lambda x: x != 0, alpha_expr))
delta_expr = list(filter(lambda x: x != 0, delta_expr))

max_length = max(len(beta_expr), len(alpha_expr), len(delta_expr))
df = pd.DataFrame({
    'Beta cell': beta_expr + [None] * (max_length - len(beta_expr)),
    'Alpha cell': alpha_expr + [None] * (max_length - len(alpha_expr)),
    'Delta cell': delta_expr + [None] * (max_length - len(delta_expr)),
})
df.to_csv('sc_HUM_Kif21a_expression_cell_types.csv', index=False)

In [None]:
#We will now go through human data for both normal and type-2 diabetes cells

In [None]:
output_h5ad = sc_HUM_data.copy()
sc.pp.normalize_total(output_h5ad)
sc.pp.log1p(output_h5ad)

In [None]:
beta_h5ad = output_h5ad[output_h5ad.obs['cell_type'] == 'type B pancreatic cell']
alpha_h5ad = output_h5ad[output_h5ad.obs['cell_type'] == 'pancreatic A cell']
delta_h5ad = output_h5ad[output_h5ad.obs['cell_type'] == 'pancreatic D cell']

In [None]:
#We will do each cell type at a time.

In [None]:
#beta cells
normal_h5ad = beta_h5ad[beta_h5ad.obs.disease == 'normal', :]
T2D_h5ad = beta_h5ad[beta_h5ad.obs.disease =='type 2 diabetes mellitus', :]

In [None]:
gene_name = 'KIF21A'

normal_expr = normal_h5ad[:, normal_h5ad.var.feature_name == gene_name].X.toarray().flatten()
normal_expr = list(filter(lambda x: x != 0, normal_expr)) #removes 0s

T2D_expr = T2D_h5ad[:, T2D_h5ad.var.feature_name == gene_name].X.toarray().flatten()
T2D_expr = list(filter(lambda x: x != 0, T2D_expr))

In [None]:
max_length = max(len(normal_expr), len(T2D_expr))
df = pd.DataFrame({
    'normal': normal_expr + [None] * (max_length - len(normal_expr)),
    'T2D_expr': 
})

df.to_csv('sc_HUM_Kif21a_beta_expr_disease.csv', index=False)

In [None]:
#alpha cells
normal_h5ad = alpha_h5ad[alpha_h5ad.obs.disease == 'normal', :]
T2D_h5ad = alpha_h5ad[alpha_h5ad.obs.disease =='type 2 diabetes mellitus', :]

In [None]:
gene_name = 'KIF21A'

normal_expr = normal_h5ad[:, normal_h5ad.var.feature_name == gene_name].X.toarray().flatten()
normal_expr = list(filter(lambda x: x != 0, normal_expr)) #removes 0s

T2D_expr = T2D_h5ad[:, T2D_h5ad.var.feature_name == gene_name].X.toarray().flatten()
T2D_expr = list(filter(lambda x: x != 0, T2D_expr))

In [None]:
max_length = max(len(normal_expr), len(T2D_expr))
df = pd.DataFrame({
    'normal': normal_expr + [None] * (max_length - len(normal_expr)),
    'T2D_expr': 
})

df.to_csv('sc_HUM_Kif21a_alpha_expr_disease.csv', index=False)

In [None]:
#delta cells
normal_h5ad = delta_h5ad[delta_h5ad.obs.disease == 'normal', :]
T2D_h5ad = delta_h5ad[delta_h5ad.obs.disease =='type 2 diabetes mellitus', :]

In [None]:
gene_name = 'KIF21A'

normal_expr = normal_h5ad[:, normal_h5ad.var.feature_name == gene_name].X.toarray().flatten()
normal_expr = list(filter(lambda x: x != 0, normal_expr)) #removes 0s

T2D_expr = T2D_h5ad[:, T2D_h5ad.var.feature_name == gene_name].X.toarray().flatten()
T2D_expr = list(filter(lambda x: x != 0, T2D_expr))

In [None]:
max_length = max(len(normal_expr), len(T2D_expr))
df = pd.DataFrame({
    'normal': normal_expr + [None] * (max_length - len(normal_expr)),
    'T2D_expr': 
})

df.to_csv('sc_HUM_Kif21a_delta_expr_disease.csv', index=False)