# **Analyze the data from scratch**
steps

## Load Libraries

In [1]:
# LOAD LIBRARIES
library(Seurat)
library(tidyverse)
library(future)
library(ggplot2)
library(dplyr)
library(presto)
library(cowplot)

library(enrichR)
library(GPTCelltype)
library(openai)

Loading required package: SeuratObject

Loading required package: sp


Attaching package: ‘SeuratObject’


The following object is masked from ‘package:base’:

    intersect


── [1mAttaching core tidyverse packages[22m ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.4.4     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.4     
── [1mConflicts[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mst

## Setup

In [2]:
# SET UP NAMES
timepoints <- c("23days", "1month", "1.5month", "2month", "3month", "4month", "5month", "6month")
housekeeping_genes <- c("ACTB", "DLG4")
genes_of_interest <- c("SRCIN1", "KIAA1217", "CIT")
path_to_data <- "/sharedFolder/Data/"

name_new_dir_results <- paste(getwd(), "/Results", sep = "")
if (!dir.exists(name_new_dir_results)) {
    dir.create(name_new_dir_results)
}

name_new_dir_partial <- paste(getwd(), "/Partial", sep = "")
if (!dir.exists(name_new_dir_partial)) {
    dir.create(name_new_dir_partial)
}

In [3]:
for (a in 1:8) {
    name_new_dir <- paste(name_new_dir_partial, "/", timepoints[a], sep="")     
    if (!dir.exists(name_new_dir)) {
        dir.create(name_new_dir)
    }
    name_new_dir <- paste(name_new_dir_results, "/", timepoints[a], sep="")     
    if (!dir.exists(name_new_dir)) {
        dir.create(name_new_dir)
    }  
}

## Define functions

### load.data(time_point)
* LOAD DATA
* NORMALIZE
* FIND VARIABLE FEATURES
* SCALE DATA

In [4]:
load.data <- function(time_point, save = TRUE) {
    print(paste("Loading data for time point:", timepoints[time_point]))

    # Load the data
    sc_data <- Read10X(data.dir = paste(path_to_data, "expression_", timepoints[time_point], sep = ""), gene.column = 1)

    # Create Seurat object
    sc_data <- CreateSeuratObject(counts = sc_data, min.cells = 3, min.features = 500, project = timepoints[time_point], names.delim = "-", names.field = 2)

    # Normalize the data
    sc_data <- NormalizeData(sc_data, normalization.method = "LogNormalize", scale.factor = 1e6)

    # Find variable features
    sc_data <- FindVariableFeatures(sc_data, selection.method = "mvp", nfeatures = 2000)

    # Scale the data
    sc_data <- ScaleData(sc_data)

    # Save the Scaled data
    if (save) {
        name_new_dir <- paste(name_new_dir_partial, "/", timepoints[time_point], sep = "")
        if (!dir.exists(name_new_dir)) {dir.create(name_new_dir)}
        
        print(paste("Saving PCA for time point", timepoints[time_point], "in", name_new_dir))
        save(sc_data, file = paste(name_new_dir, "/Scaled_", timepoints[time_point], ".Robj", sep = ""))
    }
    
    return(sc_data)
}

### PCA.cluster(x = sc_data, res)
* Run PCA
* Cluster the cells

In [5]:
PCA.cluster <- function(x = sc_data, res = 1, save = TRUE){
    print(paste("Running PCA and clustering for time point:", timepoints[time_point]))
    
    # PCA
    x <- RunPCA(x, npcs = 50, verbose = FALSE)
    #print(ElbowPlot(object = x, ndims = 50))

    # Cluster the cells
    x <- FindNeighbors(x, dims = 1:40)
    x <- FindClusters(x, resolution = res)
    
    print(table(Idents(x)))

    # Save the PCA plot
    if (save) {
        name_new_dir <- paste(name_new_dir_partial, "/", timepoints[time_point], sep="")
        if (!dir.exists(name_new_dir)) {dir.create(name_new_dir)} 
    
        print(paste("Saving PCA for time point", timepoints[time_point], "in", name_new_dir))
        save(x, file = paste(name_new_dir, "/PCA_res_",res,"_",timepoints[time_point],".Robj", sep=""))
    }
    return(x)
}

### cluster.markers(x = sc_data)
* find all markers for every cluster compared to all remaining cells

In [6]:
# FIND ALL MARKERS
cluster.markers <- function(x, save = TRUE) {
    print(paste("Finding all markers for time point:", timepoints[time_point]))

    # Find all markers for every cluster compared to all remaining cells
    markers <- FindAllMarkers(x,
                                        only.pos = TRUE,   # Considera solo i marker espressi positivamente
                                        min.pct = 0.25,    # Percentuale minima di espressione nelle cellule del cluster
                                        logfc.threshold = 0.25)  # Soglia minima di LogFC
    
    # Save the markers
    if (save) {
        name_new_dir <- paste(name_new_dir_partial, "/", timepoints[time_point], sep="")     
        if (!dir.exists(name_new_dir)) {dir.create(name_new_dir)} 
        
        print(paste("Saving cluster markers for time point", timepoints[time_point], "in", name_new_dir))
        save(markers, file = paste(name_new_dir, "/cluster_markers_",timepoints[time_point],".Robj", sep=""))
    }
        
    return(markers)
}

### Reload from partials
* load.sc_data(time_point) = up to scaleData()
* load.cluster(time_point, res) = clustered
* load.markers(time_point) = markers

In [7]:
# RELOAD DATA
load.sc_data <- function(time_point) {
    name_new_dir <- paste(name_new_dir_partial, "/", timepoints[time_point], sep = "")
    load(paste(name_new_dir, "/cluster_markers_", timepoints[time_point], ".Robj", sep = ""))
    return(sc_data)
}

load.clusters <- function(time_point, res) {
    name_new_dir <- paste(name_new_dir_partial, "/", timepoints[time_point], sep = "")
    load(paste(name_new_dir, "/PCA_res_", res, "_", timepoints[time_point], ".Robj", sep = ""))
    return(sc_data)
}

load.markers <- function(time_point) {
    name_new_dir <- paste(name_new_dir_partial, "/", timepoints[time_point], sep = "")
    load(paste(name_new_dir, "/cluster_markers_", timepoints[time_point], ".Robj", sep = ""))
    return(cluster_markers)
}

### de.genes(genes_oi)
* finds whether one or more genes of interest are differentialy expressed in the clusters
* if no genes are specified this function assumes that **genes_oi** is **genes_of_interest**

In [8]:
# FIND DIFFERENTIALLY EXPRESSED GENES
de.genes <- function(genes_oi = genes_of_interest, save = TRUE) {
    print(paste("Finding differentially expressed genes for time point:", timepoints[time_point]))

    # Find differentially expressed genes
    de_genes <- cluster_markers %>% filter(gene %in% genes_oi)
    print(de_genes)

    # Save the DE genes
    if (save) {
        name_new_dir <- paste(name_new_dir_results, "/", timepoints[time_point], sep = "")
        if (!dir.exists(name_new_dir)) {dir.create(name_new_dir)}
        
        print(paste("Saving differentially expressed genes for time point", timepoints[time_point], "in", name_new_dir))
        write.csv(de_genes, file = paste(name_new_dir, "/de_genes_", timepoints[time_point], ".csv", sep = ""))
    }
    
    return(de_genes)
}

## Violin Plots

### Violin genes-clusters

In [9]:
vin.genes <- function(X) {
    VlnPlot(
        sc_data, 
        features = X, 
        group.by = "seurat_clusters", 
        layer = "data"
    ) +
    labs(
        title = paste(timepoints[time_point], "-", X),
        x = "Cluster"
    ) +
    theme(
        legend.position = "none"
    )
}

In [10]:
vin.genes.log <- function(X) {
    VlnPlot(
        sc_data, 
        features = X, 
        group.by = "seurat_clusters", 
        layer = "data",
        log = TRUE
    ) +
    labs(
        x = "Cluster",
        y = "Expression Level (log)"
    ) +
    theme(
        legend.position = "none"
    )
}

In [11]:
vin.genes.comb <- function (X, name, log = FALSE) {    
    if (log) {
        violin <- lapply(X, vin.genes.log)
    } else {
        violin <- lapply(X, vin.genes)
    }
    
    if (name == 1) {
        name <- "genes_of_interest"
    } else if (name == 2) {
        name <- "housekeeping_genes"
    }

    logarithmic <- ifelse(log, "logarithmic", "")
    print(
        paste(
            "Violin", logarithmic, " plot for", name, 
            "in",
            timepoints[time_point]
        )
    )
    
    name_prefix <- ifelse(log, "(log10)", "")
    
    combined <- plot_grid(plotlist = violin, ncol = 1)
    title <- ggdraw() + draw_label(paste(timepoints[time_point],"- Genes of interest", name_prefix), fontface = 'bold', size = 14)
    comp_plot <- plot_grid(title, combined, ncol = 1, rel_heights = c(0.1, 2))

    # Save
    folder <- paste0("Results/", timepoints[time_point], "/Violin_genes")
    if (!dir.exists(folder)) {dir.create(folder)}
    
    file_prefix <- ifelse(log, "log10_", "")
    ggsave(
        paste0(
            folder,
            "/", file_prefix, "violin_plot_",
            name, "_",
            timepoints[time_point],
            ".png"
        ),
        plot = comp_plot,
        width = 1920, height = 1080*length(X), units = "px"
    )

    return(comp_plot)
}

### Violin ratio-clusters

In [12]:
vin.ratio <- function(gene_oi, housekeeping, log = FALSE) {
    logarithmic <- ifelse(log, "logarithmic", "")
    print(
        paste(
            "Plotting the", logarithmic, "ratio violin between",
            gene_oi,
            "and",
            housekeeping,
            "in",
            timepoints[time_point]
        )
    )
    
    # Extract normalized data
    exprs <- FetchData(sc_data, vars = c(gene_oi, housekeeping), slot = "data")
    
    # Compute ratio
    exprs$ratio <- exprs[[gene_oi]] / exprs[[housekeeping]]

    # Add cluster info
    exprs$cluster <- Idents(sc_data)

    loggi <- ifelse(log, "(log10)", "")
    
    # Plot violin
    vin_ratio <- ggplot(exprs, aes(x = cluster, y = ratio)) +
        geom_violin(trim = FALSE, fill = "#56B4E9") +
        labs(
            title = paste(timepoints[time_point], "-", gene_oi, "/", housekeeping, loggi),
            x = "Cluster",
            y = paste(gene_oi, "/", housekeeping, loggi)
        ) +
        theme_classic() +
        theme(legend.position = "none")

    if (log) {
        vin_ratio <- vin_ratio + scale_y_log10()
    }

    # Save
    folder <- paste0("Results/", timepoints[time_point], "/Violin_ratio")
    if (!dir.exists(folder)) {dir.create(folder)}
    
    file_prefix <- ifelse(log, "log10_", "")
    ggsave(
        paste0(
            folder,
            "/", file_prefix, "violin_plot_",
            gene_oi, "-", housekeeping, "_ratio_",
            timepoints[time_point],
            ".png"
        ),
        plot = vin_ratio,
        width = 1920, height = 1080, units = "px"
    )
    
    return(vin_ratio)
}

### All Violin plots

In [13]:
vin.all <- function(
    g_oi,
    hk
) {
    vin.genes.comb(g_oi, name = 1)
    vin.genes.comb(g_oi, name = 1, log = TRUE)

    vin.genes.comb(hk, name = 2)
    vin.genes.comb(hk, name = 2, log = TRUE)

    for (i in 1:length(g_oi)) {
        for (j in 1:length(hk)) {
            # linear
            vin.ratio(
                gene_oi = g_oi[i],
                housekeeping = hk[j],
                log = FALSE
            )
            
            # log
            vin.ratio(
                gene_oi = g_oi[i],
                housekeeping = hk[j],
                log = TRUE
            )
        }
    }
}

## Comparison

In [14]:
comparison.plot <- function(
    gene_oi = genes_of_interest[goi], 
    housekeeping = housekeeping_genes[hk], 
    log = FALSE
) {
    genes <- c(gene_oi, housekeeping)
    logarithmic <- ifelse(log, "logarithmic", "")
    print(
        paste(
            "Plotting the", logarithmic, "ratio between",
            genes[1],
            "and",
            genes[2],
            "in",
            timepoints[time_point]
        )
    )
    
    expression_data <- FetchData(sc_data, vars = genes)
    expression_data$cluster <- sc_data$seurat_clusters[colnames(sc_data)]
    expression_data$goi_hk_ratio <- expression_data[[1]]/expression_data[[2]]
    
    if (log) {
        comp_plot <- ggplot(
            expression_data, 
            aes(x = goi_hk_ratio, fill = cluster)
        ) + 
        geom_histogram(
            binwidth = 0.2, position = "dodge", alpha = 0.7, 
            aes(y = after_stat(count) + 1)
        ) + 
        labs(
            title = paste0(
                timepoints[time_point], 
                " - Histogram (log10) of ", 
                genes[1], "/", genes[2], 
                " Ratio by Cluster"
            ), 
            x = paste0(genes[1], "/", genes[2], " Ratio"), 
            y = "Number of Cells (+1, log10 scale)"
        ) + 
        theme_minimal() + 
        scale_fill_manual(values = rainbow(length(unique(expression_data$cluster))))

        
    } else {
        comp_plot <- ggplot(
            expression_data, 
            aes(x = goi_hk_ratio, fill = cluster)
        ) + 
        geom_histogram(
            binwidth = 0.2, position = "dodge", alpha = 0.7, 
            aes(y = after_stat(count))
        ) + 
        labs(
            title = paste0(
                timepoints[time_point], 
                " - Histogram of ", 
                genes[1], "/", genes[2], 
                " Ratio by Cluster"
            ), 
            x = paste0(genes[1], "/", genes[2], " Ratio"), 
            y = "Number of Cells"
        ) + 
        theme_minimal() + 
        scale_fill_manual(values = rainbow(length(unique(expression_data$cluster))))
    }

    if (log) {
        comp_plot <- comp_plot + scale_y_log10()
    } 

    # Save
    folder <- paste0("Results/", timepoints[time_point], "/gene-hk_comparison")
    if (!dir.exists(folder)) {dir.create(folder)}

    file_prefix <- ifelse(log, "log10_", "")
    ggsave(
        paste0(
            folder,
            "/", file_prefix, "gene-hk_comparison_",
            genes[1], "-", genes[2], "_",
            timepoints[time_point],
            ".png"
        ),
        plot = comp_plot,
        width = 1920*2, height = 1080*2, units = "px"
    )
    
    return(comp_plot)
}

In [15]:
all.comparison.plot <- function(
    g_oi,
    hk
) {
    for (i in 1:length(g_oi)) {
        for (j in 1:length(hk)) {
            # linear
            comparison.plot(
                gene_oi = g_oi[i],
                housekeeping = hk[j],
                log = FALSE
            )

            # log
            comparison.plot(
                gene_oi = g_oi[i],
                housekeeping = hk[j],
                log = TRUE
            )
        }
    }
}

## Cell annotation

# Compute

## Single dataset

### Cluster

In [16]:
time_point <- 1

In [17]:
# Load the data
sc_data <- load.data(time_point, save = FALSE)

[1] "Loading data for time point: 23days"


Normalizing layer: counts

Finding variable features for layer data

Centering and scaling data matrix



In [18]:
# Run PCA and clustering
sc_data <- PCA.cluster(sc_data, res = 1, save = TRUE)

[1] "Running PCA and clustering for time point: 23days"


Computing nearest neighbor graph

Computing SNN



Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 29736
Number of edges: 1146654

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8914
Number of communities: 25
Elapsed time: 4 seconds

   0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
5195 2314 2018 2001 1731 1659 1558 1476 1446 1341 1213 1185 1021  914  872  585 
  16   17   18   19   20   21   22   23   24 
 578  545  526  488  401  362  116  106   85 
[1] "Saving PCA for time point 23days in /sharedFolder/Paper_clusters_from_scratch/Partial/23days"


### Plots

In [19]:
# Violin Plots
vin.all(
    g_oi = genes_of_interest,
    hk = housekeeping_genes
)

[1] "Violin   plot for genes_of_interest in 23days"
[1] "Violin logarithmic  plot for genes_of_interest in 23days"
[1] "Violin   plot for housekeeping_genes in 23days"
[1] "Violin logarithmic  plot for housekeeping_genes in 23days"
[1] "Plotting the  ratio violin between SRCIN1 and ACTB in 23days"


“[1m[22mThe `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
[36mℹ[39m Please use the `layer` argument instead.”
“[1m[22mRemoved 284 rows containing non-finite values (`stat_ydensity()`).”


[1] "Plotting the logarithmic ratio violin between SRCIN1 and ACTB in 23days"


“[1m[22mTransformation introduced infinite values in continuous y-axis”
“[1m[22mRemoved 28476 rows containing non-finite values (`stat_ydensity()`).”
“[1m[22mGroups with fewer than two data points have been dropped.”


[1] "Plotting the  ratio violin between SRCIN1 and DLG4 in 23days"


“[1m[22mRemoved 27814 rows containing non-finite values (`stat_ydensity()`).”
“[1m[22mGroups with fewer than two data points have been dropped.”


[1] "Plotting the logarithmic ratio violin between SRCIN1 and DLG4 in 23days"


“[1m[22mTransformation introduced infinite values in continuous y-axis”
“[1m[22mRemoved 29548 rows containing non-finite values (`stat_ydensity()`).”
“[1m[22mGroups with fewer than two data points have been dropped.”
“[1m[22mGroups with fewer than two data points have been dropped.”
“[1m[22mGroups with fewer than two data points have been dropped.”
“[1m[22mGroups with fewer than two data points have been dropped.”
“[1m[22mGroups with fewer than two data points have been dropped.”


[1] "Plotting the  ratio violin between KIAA1217 and ACTB in 23days"


“[1m[22mRemoved 284 rows containing non-finite values (`stat_ydensity()`).”


[1] "Plotting the logarithmic ratio violin between KIAA1217 and ACTB in 23days"


“[1m[22mTransformation introduced infinite values in continuous y-axis”
“[1m[22mRemoved 28313 rows containing non-finite values (`stat_ydensity()`).”
“[1m[22mGroups with fewer than two data points have been dropped.”


[1] "Plotting the  ratio violin between KIAA1217 and DLG4 in 23days"


“[1m[22mRemoved 27814 rows containing non-finite values (`stat_ydensity()`).”
“[1m[22mGroups with fewer than two data points have been dropped.”


[1] "Plotting the logarithmic ratio violin between KIAA1217 and DLG4 in 23days"


“[1m[22mTransformation introduced infinite values in continuous y-axis”
“[1m[22mRemoved 29589 rows containing non-finite values (`stat_ydensity()`).”
“[1m[22mGroups with fewer than two data points have been dropped.”
“[1m[22mGroups with fewer than two data points have been dropped.”
“[1m[22mGroups with fewer than two data points have been dropped.”


[1] "Plotting the  ratio violin between CIT and ACTB in 23days"


“[1m[22mRemoved 284 rows containing non-finite values (`stat_ydensity()`).”


[1] "Plotting the logarithmic ratio violin between CIT and ACTB in 23days"


“[1m[22mTransformation introduced infinite values in continuous y-axis”
“[1m[22mRemoved 28676 rows containing non-finite values (`stat_ydensity()`).”
“[1m[22mGroups with fewer than two data points have been dropped.”
“[1m[22mGroups with fewer than two data points have been dropped.”


[1] "Plotting the  ratio violin between CIT and DLG4 in 23days"


“[1m[22mRemoved 27814 rows containing non-finite values (`stat_ydensity()`).”
“[1m[22mGroups with fewer than two data points have been dropped.”


[1] "Plotting the logarithmic ratio violin between CIT and DLG4 in 23days"


“[1m[22mTransformation introduced infinite values in continuous y-axis”
“[1m[22mRemoved 29587 rows containing non-finite values (`stat_ydensity()`).”
“[1m[22mGroups with fewer than two data points have been dropped.”
“[1m[22mGroups with fewer than two data points have been dropped.”
“[1m[22mGroups with fewer than two data points have been dropped.”


In [20]:
# Comparison plots
all.comparison.plot(
    g_oi = genes_of_interest,
    hk = housekeeping_genes
)

[1] "Plotting the  ratio between SRCIN1 and ACTB in 23days"


“[1m[22mRemoved 284 rows containing non-finite values (`stat_bin()`).”


[1] "Plotting the logarithmic ratio between SRCIN1 and ACTB in 23days"


“[1m[22mRemoved 284 rows containing non-finite values (`stat_bin()`).”


[1] "Plotting the  ratio between SRCIN1 and DLG4 in 23days"


“[1m[22mRemoved 27814 rows containing non-finite values (`stat_bin()`).”


[1] "Plotting the logarithmic ratio between SRCIN1 and DLG4 in 23days"


“[1m[22mRemoved 27814 rows containing non-finite values (`stat_bin()`).”


[1] "Plotting the  ratio between KIAA1217 and ACTB in 23days"


“[1m[22mRemoved 284 rows containing non-finite values (`stat_bin()`).”


[1] "Plotting the logarithmic ratio between KIAA1217 and ACTB in 23days"


“[1m[22mRemoved 284 rows containing non-finite values (`stat_bin()`).”


[1] "Plotting the  ratio between KIAA1217 and DLG4 in 23days"


“[1m[22mRemoved 27814 rows containing non-finite values (`stat_bin()`).”


[1] "Plotting the logarithmic ratio between KIAA1217 and DLG4 in 23days"


“[1m[22mRemoved 27814 rows containing non-finite values (`stat_bin()`).”


[1] "Plotting the  ratio between CIT and ACTB in 23days"


“[1m[22mRemoved 284 rows containing non-finite values (`stat_bin()`).”


[1] "Plotting the logarithmic ratio between CIT and ACTB in 23days"


“[1m[22mRemoved 284 rows containing non-finite values (`stat_bin()`).”


[1] "Plotting the  ratio between CIT and DLG4 in 23days"


“[1m[22mRemoved 27814 rows containing non-finite values (`stat_bin()`).”


[1] "Plotting the logarithmic ratio between CIT and DLG4 in 23days"


“[1m[22mRemoved 27814 rows containing non-finite values (`stat_bin()`).”


### Markers

In [21]:
# Find all markers
cluster_markers <- cluster.markers(sc_data)

[1] "Finding all markers for time point: 23days"


Calculating cluster 0

Calculating cluster 1

Calculating cluster 2

Calculating cluster 3

Calculating cluster 4

Calculating cluster 5

Calculating cluster 6

Calculating cluster 7

Calculating cluster 8

Calculating cluster 9

Calculating cluster 10

Calculating cluster 11

Calculating cluster 12

Calculating cluster 13

Calculating cluster 14

Calculating cluster 15

Calculating cluster 16

Calculating cluster 17

Calculating cluster 18

Calculating cluster 19

Calculating cluster 20

Calculating cluster 21

Calculating cluster 22

Calculating cluster 23

Calculating cluster 24



[1] "Saving cluster markers for time point 23days in /sharedFolder/Paper_clusters_from_scratch/Partial/23days"


In [22]:
# Find differentially expressed genes
de_genes <- de.genes(genes_of_interest)

[1] "Finding differentially expressed genes for time point: 23days"
    p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
CIT     0   3.193657 0.291 0.025         0      10  CIT
[1] "Saving differentially expressed genes for time point 23days in /sharedFolder/Paper_clusters_from_scratch/Results/23days"


### Annotations

## All

In [55]:
# SET UP NAMES
timepoints <- c("23days", "1month", "1.5month", "2month", "3month", "4month", "5month", "6month")
housekeeping_genes <- c("ACTB", "DLG4")
genes_of_interest <- c("SRCIN1", "KIAA1217", "CIT")
path_to_data <- "/sharedFolder/Data/"

name_new_dir_results <- paste(getwd(), "/Results", sep = "")
if (!dir.exists(name_new_dir_results)) {
    dir.create(name_new_dir_results)
}

name_new_dir_partial <- paste(getwd(), "/Partial", sep = "")
if (!dir.exists(name_new_dir_partial)) {
    dir.create(name_new_dir_partial)
}

In [None]:
for (time_point in 1:length(timepoints)) {
    print(paste("STARTING WITH", timepoints[time_point], "LOADING DATA"))

    # Load the data
    sc_data <- load.data(time_point, save = TRUE)

    print(paste(timepoints[time_point], "- DATA LOADED, CLUSTERING"))
    # Run PCA and clustering
    sc_data <- PCA.cluster(sc_data, res = 1, save = TRUE)

    print(paste(timepoints[time_point], "- CLUSTERS DONE, STARTING WITH PLOTS"))

    print(paste(timepoints[time_point], "- VIOLINS"))
    # Violin Plots
    vin.all(
        g_oi = genes_of_interest,
        hk = housekeeping_genes
    )

    print(paste(timepoints[time_point], "- COMPARISON HISTOGRAMS"))
    # Comparison plots
    all.comparison.plot(
        g_oi = genes_of_interest,
        hk = housekeeping_genes
    )
    
    print(paste(timepoints[time_point], "- PLOTS DONE"))
    
    print(paste(timepoints[time_point], "- FIND ALL MARKERS"))
    # Find all markers
    cluster_markers <- cluster.markers(sc_data)

    print(paste(timepoints[time_point], "- DIFFERENTIAL EXPRESSION FOR GENES OF INTEREST"))
    # Find differentially expressed genes
    de_genes <- de.genes(genes_of_interest)
}