In [3]:
# Import necessary libraries
import os
import scanpy as sc
import matplotlib.pyplot as plt
import pandas as pd
from scipy.io import mmread
from scipy.sparse import csr_matrix, lil_matrix
from pathlib import Path
#import argparse
filestructure={
    "Exp1": "../../data/AllonKleinLab/Experiment1/stateFate_inVitro_normed_counts.mtx",
    "Exp2": "../../data/AllonKleinLab/Experiment2/stateFate_inVivo_normed_counts.mtx",
    "Exp3": "../../data/AllonKleinLab/Experiment3/stateFate_cytokinePerturbation_normed_counts.mtx",
    "Melanoma": "../../data/Melanoma/GSE72056_melanoma_single_cell_revised_v2.txt",
    "MouseCortex": "../../data/MouseCortex/MouseCortex.RData",
    "MyocardialInfarction": "../../data/MyocardialInfarction/e61af320-303a-4029-8500-db6636bba0d4.rds"
}

## Create the ArgumentParser object
#parser = argparse.ArgumentParser()
#parser.add_argument("-i", "--input", help="input file")
#args = parser.parse_args()
#argin= args.input

argin="Melanoma"
# Rest of the code
if argin:
    print("input file: %s" % argin)
    inputfile= filestructure[argin]
else:
    print("input file: no input file provided")
    inputfile= filestructure["Melanoma"]

# Setup testing phase
TEST = True
TEST_genes = 300
TEST_samples = 500

# Archetypes
k = 15

input file: Melanoma


# Load Dataset

In [4]:
if Path(inputfile).suffix == ".mtx":
    se = csr_matrix(mmread(inputfile))
if Path(inputfile).suffix == ".txt":
    se = pd.read_csv(inputfile, sep="\t")
if Path(inputfile).suffix == ".RData":
    exit(NotImplementedError)
print(se)
print(type(se))
if TEST:
    tgenes = min(TEST_genes, se.shape[1])
    tsamples = min(TEST_samples, se.shape[0])
    se.resize((tsamples, tgenes))
    print(se.shape)
    print(type(se))

# Convert the matrix to AnnData object
adata = sc.AnnData(se)
print(adata)


                                                    Cell  \
0                                                  tumor   
1                     malignant(1=no,2=yes,0=unresolved)   
2      non-malignant cell type (1=T,2=B,3=Macro.4=End...   
3                                               C9orf152   
4                                                  RPS11   
...                                                  ...   
23684                                            PIK3IP1   
23685                                             SNRPD2   
23686                                            SLC39A6   
23687                                               CTSC   
23688                                               AQP7   

       Cy72_CD45_H02_S758_comb  CY58_1_CD45_B02_S974_comb  \
0                      72.0000                    58.0000   
1                       1.0000                     1.0000   
2                       2.0000                     1.0000   
3                       0.0000     

AttributeError: 'DataFrame' object has no attribute 'resize'

In [None]:

# Standard workflow for pre-processing and clustering
#sc.pp.normalize_total(adata, target_sum=1e6)
#sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata


In [None]:
adata = adata[:, adata.var.highly_variable]
sc.pp.scale(adata, max_value=10)
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
sc.tl.umap(adata)


# Visualize data

In [None]:
# PCA plot
imgname = os.path.join(out_path, "PCA.png")
print(f"Saving Image --- {imgname}")
sc.pl.pca(adata, save=imgname.split('/')[-1])

In [None]:
# UMAP plot
imgname = os.path.join(out_path, "UMAP.png")
print(f"Saving Image --- {imgname}")
sc.pl.umap(adata, save=imgname.split('/')[-1])

In [None]:
# Elbow plot
imgname = os.path.join(out_path, "elbow.pdf")
print(f"Saving Image --- {imgname}")
sc.pl.pca_variance_ratio(adata, log=True, save=imgname.split('/')[-1])

# Archetypes

In [None]:
import archetypes as arch
from time import time

aa_kwargs = {
    "n_archetypes": 4,
    "n_init": 5,
    "max_iter": 100000,
    "verbose": True,
    "tol": 1e-3,
}

mod0 = arch.AA(**aa_kwargs, algorithm_init="furthest_sum")

t0=time()
mod0.fit_transform(adata.X)
t1=time()

print(f"mod0: {t1-t0:.2f} seconds|RSS: {mod0.rss:.2f}")