# Finalze U
For Extended Data Figure 5d

This code was used to compute U with optimised SMAF parameters on entire training dataset.

## Inport libraries

In [1]:
# Import libraries
import anndata as ad
from pathlib import Path
import os
import numpy as np
import pandas as pd
import errno

## setup

In [2]:
# Import system libraries to configure code directory as module
from os.path import dirname, abspath, join
import sys

# Find code directory relative to our directory
THIS_DIR = dirname('__file__')
CODE_DIR = abspath(join(THIS_DIR, '..', 'code'))
# Add code directory to systems paths
sys.path.append(CODE_DIR)

In [3]:
# Import fnc.
from smaf import smaf
from utils import analyse_U_W, analyse_decoding, is_valid_file

## Input data and specify output

In [4]:
# Read sce data
data_path = Path('/mnt/projects/data')
sce_path = Path(os.path.join(data_path,'0_preprocess_th184/processed/sce.h5ad'))

EXP_name = 'publication/7_Finalize_U'
out_path = Path(os.path.join(data_path, EXP_name))
U_path = Path(os.path.join(out_path, "U"))
# Create output directory if it doesn't exist
out_path.mkdir(parents=True, exist_ok=True)
U_path.mkdir(parents=True, exist_ok=True)

In [5]:
# Check that input files/dictionary exist
if not is_valid_file(sce_path, ['.h5ad']):
    # If file is not found, throw error
    raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),sce_path)

In [6]:
# read sce 
sce = ad.read_h5ad(sce_path)
# subset sce with relavant markers
sce = sce[:, ~sce.var.index.isin(["panCK","CD15", "CD11c", "CD56"])]
# remove tonsil 1, 2 to reduce non-tumor immune cells
sce = sce[~sce.obs.tissue.isin(["Tonsil1","Tonsil2"]),:]

## Set SMAF parameters

In [7]:
## Set params for U
# set variables
d = 80
maxItr = 100
nthread = -1

# main parameter
methodW = 'lasso'
ldaU = 0.02

# methodW specific parameter
nblocksW_lasso = 1
ldaW = 0.02

## Perform SMAF

In [8]:
# use all cells from traiing data for final U
X_trainingU = sce


# calc U
U,W,X = smaf(X_trainingU,d,maxItr,methodW,ldaU, ldaW=ldaW, THREADS=nthread,  X_normalization='paper_norm',
          num_blocks_W=nblocksW_lasso, num_blocks_U=1, layer=None, Normalize_U=True, saveItr=False) 
# get SMAF results
res_U, coln_U = analyse_U_W(U, W, X)


    


Initialized U, W with NMF, SMAF maxItr =  100


## Save results

In [9]:
# save U 
pd.DataFrame(U, columns=list(range(1, U.shape[1]+1)),index=sce.var_names).to_csv(
    os.path.join(U_path, 'U_final.csv'))

# Transform U results into DF
df = pd.DataFrame(res_U).T ## .T added since it's 1D DF
df.columns = coln_U
# save U results as csv
df.to_csv(path_or_buf=os.path.join(out_path, 'result_U.csv'))

