In [1]:
import warnings
warnings.filterwarnings("ignore",category=FutureWarning)

import scanpy as sc
import numpy as np
from scipy import sparse
import random

In [2]:
# Import Control Dataset
adata = sc.read_h5ad("controls_Trametinib_mergedtahoe100_hvg.h5ad")

In [3]:
import numpy as np
import scipy.sparse as sp

X = adata.X.A if sp.issparse(adata.X) else adata.X
print("Shape:", X.shape)
print("Min value before log1p:", X.min())
print("Max value before log1p:", X.max())
print("Any NaNs?", np.isnan(X).sum())
print("Any negatives?", np.any(X < 0))

Shape: (2599630, 4000)
Min value before log1p: 0.0
Max value before log1p: 1042.0
Any NaNs? 0
Any negatives? False


## Generate new AnnData prepared for FCR

In [4]:
new_adata = adata

In [5]:
#Generate new columns
new_adata.obs = new_adata.obs.rename(columns={"drug":"Agg_Treatment", "cell_line":"covariates"})
new_adata.obs["control"] = new_adata.obs["Agg_Treatment"] == "DMSO_TF"
new_adata.obs["control"] = new_adata.obs["control"].astype(int)
new_adata.uns["fields"] = []
new_adata.obs["dose"] = new_adata.obs["drugname_drugconc"].str.split(",").str[1].astype(float)

## Downsized AnnData

In [6]:
#Import full data
full_adata = new_adata

We want to subset the dataset mantaning all the cell lines, and selecting the same number of controls and of treatments. For each cell lines we want to keep a control and the three dosages.

In [7]:
all_lines = np.unique(full_adata.obs["covariates"])
all_treatments = np.unique(full_adata.obs["drugname_drugconc"])
idx=[] #List of row indexes to mantain in the AnnData

keep_rows = []
for cell_line in all_lines:
    for treatment in all_treatments:
        mask = (full_adata.obs["covariates"] == cell_line) & (full_adata.obs["drugname_drugconc"] == treatment)
        row_indexes = full_adata.obs[mask].index
        keep_rows.extend(row_indexes[:100])
        # Randomize row order
        random.shuffle(keep_rows)

#Subset the AnnData
filtered_adata = full_adata[keep_rows, :]

In [8]:
sc.pp.normalize_total(filtered_adata, target_sum=1e4)
sc.pp.log1p(filtered_adata)
sc.pp.highly_variable_genes(filtered_adata, n_top_genes=2000, subset=True)
sc.pp.scale(filtered_adata, max_value=10)
filtered_adata.write("test_anndata_filtered.h5ad")

  view_to_actual(adata)


## Tests

In [9]:
adata = sc.read_h5ad("test_anndata_filtered.h5ad")

In [10]:
adata.obs.iloc[-128:,:]

Unnamed: 0_level_0,sample,gene_count,tscp_count,mread_count,drugname_drugconc,Agg_Treatment,covariates,sublibrary,BARCODE,pcnt_mito,S_score,G2M_score,phase,pass_filter,cell_name,plate,source_plate,n_genes,control,dose
BARCODE_SUB_LIB_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
26_056_088-lib_1491,smp_2192,1759,2914,3564,"[('Trametinib (DMSO_TF solvate)', 0.5, 'uM')]",Trametinib (DMSO_TF solvate),CVCL_1055,lib_1491,26_056_088,0.100549,-0.576190,-0.511410,G1,full,A-427,plate8,plate8,1759,0,0.50
26_020_002-lib_1489,smp_2192,638,856,1046,"[('Trametinib (DMSO_TF solvate)', 0.5, 'uM')]",Trametinib (DMSO_TF solvate),CVCL_0334,lib_1489,26_020_002,0.133178,-0.161905,-0.151832,G1,full,Hs 766T,plate8,plate8,638,0,0.50
26_103_099-lib_1495,smp_2192,1356,1842,2208,"[('Trametinib (DMSO_TF solvate)', 0.5, 'uM')]",Trametinib (DMSO_TF solvate),CVCL_0397,lib_1495,26_103_099,0.053746,-0.067213,-0.018679,G1,full,LS 180,plate8,plate8,1356,0,0.50
26_043_028-lib_1547,smp_2192,1357,1955,2319,"[('Trametinib (DMSO_TF solvate)', 0.5, 'uM')]",Trametinib (DMSO_TF solvate),CVCL_1716,lib_1547,26_043_028,0.056777,-0.019708,-0.181319,G1,full,SW 1271,plate8,plate8,1357,0,0.50
96_045_153-lib_1682,smp_2454,1132,1462,1742,"[('DMSO_TF', 0.0, 'uM')]",DMSO_TF,CVCL_1717,lib_1682,96_045_153,0.075239,0.071429,0.130383,G2M,full,SW1417,plate10,plate10,1132,1,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26_100_109-lib_1491,smp_2192,2068,3524,4311,"[('Trametinib (DMSO_TF solvate)', 0.5, 'uM')]",Trametinib (DMSO_TF solvate),CVCL_0480,lib_1491,26_100_109,0.057605,-0.519048,-0.256533,G1,full,PANC-1,plate8,plate8,2068,0,0.50
26_057_026-lib_1393,smp_2096,1504,2196,2640,"[('Trametinib (DMSO_TF solvate)', 0.05, 'uM')]",Trametinib (DMSO_TF solvate),CVCL_0480,lib_1393,26_057_026,0.090619,-0.072112,-0.123626,G1,full,PANC-1,plate7,plate7,1504,0,0.05
95_130_042-lib_1684,smp_2453,1657,2512,2914,"[('DMSO_TF', 0.0, 'uM')]",DMSO_TF,CVCL_1577,lib_1684,95_130_042,0.076035,-0.129414,-0.080037,G1,full,NCI-H661,plate10,plate10,1657,1,0.00
95_037_079-lib_1681,smp_2453,2209,3757,4427,"[('DMSO_TF', 0.0, 'uM')]",DMSO_TF,CVCL_1381,lib_1681,95_037_079,0.081182,-0.818524,-0.385714,G1,full,LOX-IMVI,plate10,plate10,2209,1,0.00
