# Check max iteration of SMAF

For Extended Data Fig. 1bc

This code runs SMAF and saves the results for each SMAF iteration.

Additionally, the analysis can be looped over SMAF parameters.
See "Set variables" for details.


## Import Libraries

In [1]:
# Import libraries
import anndata as ad
from pathlib import Path
import os
import numpy as np
import pandas as pd
import errno
import itertools

## Setup

In [2]:
# Import system libraries to configure code directory as module
from os.path import dirname, abspath, join
import sys

# Find code directory relative to our directory
THIS_DIR = dirname('__file__')
CODE_DIR = abspath(join(THIS_DIR, '..', 'code'))
# Add code directory to systems paths
sys.path.append(CODE_DIR)

In [3]:
# Import SMAF
from smaf import smaf
from utils import is_valid_file

## Input data

In [4]:
# Read sce data
data_path = Path('/mnt/projects/data')
sce_path = Path(os.path.join(data_path,'0_tissues_th182/sce/sce.h5ad'))

EXP_name = 'publication/1_SMAF_maxItr'
out_path = Path(os.path.join(data_path, EXP_name))
# Create output directory if it doesn't exist
out_path.mkdir(parents=True, exist_ok=True)

In [5]:
# Check that input files/dictionary exist
if not is_valid_file(sce_path, ['.h5ad']):
    # If file is not found, throw error
    raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),sce_path)

In [6]:
# read and subset sce with relavant markers
sce = ad.read_h5ad(sce_path)

sce = sce[:, ~sce.var.index.isin(["panCK","CD15", "CD11c", "CD56"])]



## Set variables

Input as a list for main iterator and methodW specific iterator to loop through.

d: initial n. of modules in U

maxItr: N. of iteration for each SMAF run

nthread: N. of thread

### main iterator
l_rep: N. of replicates

l_methodW: algorithm for calculating W. "lasso" or "omp_fixedk"

l_ldaU: ldaU, error tolerance coefficient when calculating U.


### methodW specific iterator
#### only for lasso
l_ldaW: ldaW, error tolerance coefficient when calculating W.

l_nblocksW: N. of blocks to separate cells when claculating W. 
#### only for omp_fixedk
l_k: Sparsity of W (N. of non-zero input per cell in W)




In [7]:
# set variables
d = 80         # initial n. of modules in U
maxItr =  100  # N. of iteration for each SMAF run
nthread = -1   # N. of thread

# main iterator
l_rep = range(1)                         # N. of replicates
l_methodW = ['lasso','omp_fixedk']       # algorithm for calculating W. "lasso" or "omp_fixedk"
l_ldaU = [0.3,0.2,0.1,0.05,0.02,0.005]   # ldaU


# methodW specific iterator
## only for lasso
l_ldaW = [0.2,0.02]#[0.3,0.2,0.1,0.05,0.02,0.005] # ldaW  
l_nblocksW = [1,20]#[1,20,200,2000]           # N. of blocks to separate cells when claculating W 
## only for omp_fixedk
l_k = [1,2,3,4]                        # Sparsity of W (N. of non-zero input per cell in W)



## Run SMAF

In [8]:
# prepare results container
sum_res = [] # results
cond = [] # conditions

# set total no of iteration for printing the progress
totalitr = len(list(l_rep))*len(l_methodW)*len(l_ldaU)

# run SMAF
for itrid, (replicate, methodW, ldaU) in enumerate(itertools.product(l_rep,l_methodW,l_ldaU)):
    print('CurrentItr:{}/{} Rep:{},methodW:{},ldaU:{}'.format(itrid+1,totalitr,replicate, methodW, ldaU))
    ldaW,k,num_blocks_W = (np.NaN,np.NaN,np.NaN) # initialize sub iterables for saving result. 
    if methodW == 'lasso':
        for ldaW,num_blocks_W in itertools.product(l_ldaW,l_nblocksW):            
            U,W,X, res, coln = smaf(sce,d,maxItr,methodW,ldaU, ldaW=ldaW, THREADS=nthread,  X_normalization='paper_norm',
              num_blocks_W=num_blocks_W, num_blocks_U=1, layer=None, Normalize_U=True, saveItr=True)    
            sum_res.extend(res)
            cond.extend([[ i, replicate, methodW, ldaU, num_blocks_W,ldaW,k] for i in range(maxItr)])
    elif methodW == 'omp_fixedk':
        for k in l_k:
            U,W,X, res, coln = smaf(sce,d,maxItr,methodW,ldaU, k=k, THREADS=nthread,  X_normalization='paper_norm',
              num_blocks_W=num_blocks_W, num_blocks_U=1, layer=None, Normalize_U=True, saveItr=True) 
            sum_res.extend(res)
            cond.extend([[ i, replicate, methodW, ldaU, num_blocks_W,ldaW,k] for i in range(maxItr)])



CurrentItr:1/12 Rep:0,methodW:lasso,ldaU:0.3
Initialized U, W with NMF, SMAF maxItr =  100
Initialized U, W with NMF, SMAF maxItr =  100
Initialized U, W with NMF, SMAF maxItr =  100
Initialized U, W with NMF, SMAF maxItr =  100
CurrentItr:2/12 Rep:0,methodW:lasso,ldaU:0.2
Initialized U, W with NMF, SMAF maxItr =  100
Initialized U, W with NMF, SMAF maxItr =  100
Initialized U, W with NMF, SMAF maxItr =  100
Initialized U, W with NMF, SMAF maxItr =  100
CurrentItr:3/12 Rep:0,methodW:lasso,ldaU:0.1
Initialized U, W with NMF, SMAF maxItr =  100
Initialized U, W with NMF, SMAF maxItr =  100
Initialized U, W with NMF, SMAF maxItr =  100
Initialized U, W with NMF, SMAF maxItr =  100
CurrentItr:4/12 Rep:0,methodW:lasso,ldaU:0.05
Initialized U, W with NMF, SMAF maxItr =  100
Initialized U, W with NMF, SMAF maxItr =  100
Initialized U, W with NMF, SMAF maxItr =  100
Initialized U, W with NMF, SMAF maxItr =  100
CurrentItr:5/12 Rep:0,methodW:lasso,ldaU:0.02
Initialized U, W with NMF, SMAF maxIt

## Save results

In [9]:
# Transform results into DF
cond = pd.DataFrame(cond)
cond.columns = ['iteration', 'replicate', 'methodW', 'ldaU', 'num_blocks_W','ldaW','k']
sum_res = pd.DataFrame(sum_res)
sum_res.columns = coln

In [10]:
# combine df results and conditions
df = sum_res.join(cond)

In [11]:
# save results as csv
df.to_csv(path_or_buf=os.path.join(out_path, 'result.csv'))