# LIGER alignment of the whole dataset: cell lines (Kinker et al) and tumors (Kim et al)

In [None]:
library(liger)
library(ggplot2)

## Import data
### Tumors

In [None]:
# UNQUOTE FOR TMP
tumor_file <- "./data/Kim/raw/GSE131907_Lung_Cancer_normalized_log2TPM_matrix.rds"
tumor_data_df <- readRDS(tumor_file)

In [None]:
n_tumor_subsamples <- 50000
subsampled_col = sample(colnames(tumor_data_df),n_tumor_subsamples)
write.csv(subsampled_col, './output/liger/subsampled_tumor_samples.csv')
subsampled_tumor_data_df <- tumor_data_df[subsampled_col]
tumor_data_df <- NULL
gc()

### Cell lines

In [None]:
# UNQUOTE FOR TMP
cell_line_file <- '../data/Kinker/raw/CPM_data.txt'
cell_line_data_df <- read.csv(cell_line_file, sep='\t')#, skip=3, header=FALSE)
rownames(cell_line_data_df) <- cell_line_data_df$GENE
cell_line_data_df <- subset(cell_line_data_df, select=-c(1))
colnames(cell_line_data_df) <- gsub("\\.", "-", colnames(cell_line_data_df))

In [None]:
# Divide by 100 to downscale to tumors
cell_line_data_df <- cell_line_data_df/100
cell_line_data_df <- log2(cell_line_data_df+1)

In [None]:
cell_line_pool <- read.csv('../data/Kinker/raw/Metadata.txt', sep='\t')
cell_line_pool <- cell_line_pool[-c(1),]
rownames(cell_line_pool) <- cell_line_pool$NAME
cell_line_pool <- cell_line_pool[colnames(cell_line_data_df),]

## LIGER

In [None]:
liger <- createLiger(list(cell_line=as.matrix(cell_line_data_df),
                          tumor=as.matrix(subsampled_tumor_data_df)))
liger <- normalize(liger)
liger <- selectGenes(liger)
liger <- scaleNotCenter(liger)

In [None]:
liger <- optimizeALS(liger, k=30, use.unshared = TRUE)

## Quantile normalisation for LIGER

In [None]:
liger <- quantile_norm(liger)

In [None]:
liger <- runUMAP(liger, use.raw = FALSE)

In [None]:
# Save tSNE
write.csv(liger@tsne.coords, './output/liger/tSNE_quantile_normalized.csv')

# Save common loadings
write.csv(liger@W, './output/sobolev_alignment/liger/matrix_W.csv')

# Save scores
write.csv(liger@H$cell_line, './output/liger/matrix_H_cell_lines.csv')
write.csv(liger@H$tumor, './output/liger/matrix_H_tumors.csv')
write.csv(liger@H.norm, './output/liger/matrix_H_normalized.csv')

# Save individual loadings
write.csv(liger@V$cell_line, './output/liger/matrix_V_cell_lines.csv')
write.csv(liger@V$tumor, './output/liger/matrix_V_tumors.csv')