::: {.content-visible when-meta="verbose"}
# Initialization
:::

In [None]:
# General R and plotting 
library(tidyverse)
library(ggplot2)
library(scales)
library(patchwork)
library(cowplot)
library(gridExtra)
library(ggrepel)
library(stringr)
library(VennDiagram)
library(pheatmap)
library(viridis)
library(here)

# Single Cell Analysis Packages
library(scRepertoire)
library(scRepertoire)
library(circlize)
library(scCustomize)
library(SingleR)
library(celldex)
library(UCell)
library(scplotter)

# DEG, pathway enrichment and visualization packages
library(DESeq2)
library(clusterProfiler)
library(DOSE)
library(pathview)
library(org.Mm.eg.db)
library(scRepertoire)
library(enrichplot)
library(msigdbr)
library(gprofiler2)

source(here('scripts/function_template.r'))

In [None]:
i_am('scripts/Analysis.ipynb')
here()
path <- here()
results_path <- here('results')
dir.create(results_path)
figures_path <- here('results/figures')
dir.create(figures_path)
data_path <- here('data')
dir.create(data_path)

:::{.content-visible when-meta='processing'}
# Processing
:::

:::{.content-visible when-meta='verbose'}
## Read in data
:::

In [None]:
seurat <- readRDS(here('data'))


contigs <- read.csv("G:/Belkaid_Lab_RNASeq_Data/Eduard_Ansaldo/single_cell_data/20250103_aCD3/scRNA_seq_gut_aggr_normalized3/outs/vdj_t/filtered_contig_annotations.csv")

#Demultiplexing VDJ libraries
contig_list <- createHTOContigList(contigs, seurat, group.by = "hash.ID")
names(contig_list)

contig_list <- contig_list[c(levels(seurat$Samples))]

names(contig_list)
head(contig_list[[1]])


In [None]:
# Combining the Contigs
combined <- combineTCR(contig_list, removeNA = TRUE, samples=c(levels(seurat$Samples)))

# #Adding groups as variables
combined <- addVariable(combined, variable.name='Groups', variables=c(
                                                'SPF-iso',
                                                'SPF-iso',
                                                'SPF-iso',
                                                'SPF-iso',
                                                'SPF-aCD3',
                                                'SPF-aCD3',
                                                'SPF-aCD3',
                                                'GF-iso',
                                                'GF-iso',
                                                'GF-iso',
                                                'GF-iso',
                                                'GF-aCD3',
                                                'GF-aCD3',
                                                'GF-aCD3',
                                                'GF-aCD3'))          

# #Adding Samples as variables
combined <- addVariable(combined, variable.name='Samples', variables=c(levels(seurat$Samples)))                                    

:::{.content-visible when-meta='verbose'}
## Integrating data with seurat object
:::

In [None]:
## Integrating analysis with the UMAP
### Combining with the seurat object

# seurat <- RenameCells(seurat, new.names=seurat$cell_barcodes)
# colnames(seurat@meta.data)
cell_names <- Cells(seurat)

groups_merge <- pull(seurat@meta.data, Samples)
# head(cell_names)
# head(combined[[1]]$barcode)

#Changing barcodes on the seurat object for the merge with VDJ data
new_cell_names <- paste(groups_merge, cell_names, sep='_')
seurat$cell_barcodes <- cell_names
seurat <- RenameCells(seurat, new.names=new_cell_names)

#Combining VDJ and Seurat Object
#seurat <- combineExpression(combined, seurat, proportion = TRUE, cloneCall='aa', group.by='Samples', cloneSize = c(Rare = 0.5e-04, Small = 0.0005, Medium = 0.005, Large = 0.05, Hyperexpanded = 1))
seurat <- combineExpression(combined, seurat, proportion = TRUE, cloneCall='aa', group.by='Samples')
table(seurat$cloneSize)


# TCR repertoire Analysis

In [None]:
#UMAP clonotype frequency
slot(seurat, "meta.data")$cloneSize <- factor(slot(seurat, "meta.data")$cloneSize, 
                levels = c(
                            "Rare (0 < X <= 1e-04)",
                           "Small (1e-04 < X <= 0.001)",
                            'Medium (0.001 < X <= 0.01)', 
                            'Large (0.01 < X <= 0.1)',
                            'Hyperexpanded (0.1 < X <= 1)',
                                                        NA)) 
DimPlot_scCustom(seurat, group.by = "cloneSize", pt.size = 1, order=T, colors_use = viridis(5)) 
ggsave(filename = 'UMAP_VDJ_clone_frequencies.pdf', path = figures_path, width = 8, height = 5)
DimPlot_scCustom(seurat, group.by = "cloneSize", pt.size = 1, order=T, colors_use = viridis(5), split.by = 'Groups')
ggsave(filename = 'UMAP_VDJ_clone_frequencies_by_group.pdf', path = figures_path, width = 16, height = 5)
    
  

:::{.content-visible when-meta='verbose'}
## Parsing alpha and beta gene usage
:::

In [None]:

colnames(seurat@meta.data |> dplyr::select(starts_with('CT')))
local_dataframe <- seurat@meta.data |> dplyr::select(starts_with('CT')) 
local_dataframe <- local_dataframe |>
                            separate_wider_delim(CTgene,delim = '_',names = c('CT_alpha', 'CT_beta'), cols_remove = FALSE)  |>
                            separate_wider_delim(CT_alpha,delim = '.',names = c('CT_V_alpha', 'CT_J_alpha', NA), cols_remove = FALSE, too_many = 'drop')  |>
                            mutate(CT_alpha_final = str_c(CT_V_alpha, '_', CT_J_alpha))
seurat$CT_alpha_final <- pull(local_dataframe, CT_alpha_final)
colnames(seurat@meta.data |> dplyr::select(starts_with('CT')))

for (column in seurat@meta.data |> dplyr::select(starts_with('CT'))) {
    print(colnames(column))
    print(length(column))
}


## Annotating known invariant TCRs

### iNKTs (TRAV11_TRAJ18)

In [None]:
Idents(seurat) <- 'CT_alpha_final'
seurat@meta.data <- mutate(seurat@meta.data, highlight=ifelse(CT_alpha_final == "TRAV11_TRAJ18", 'iNKT TCR', 'Other'))

seurat$highlight <- factor(seurat$highlight, levels=c('Other','iNKT TCR'))

DimPlot_scCustom(seurat, group.by = "highlight", order = T, pt.size = 0.01, colors_use = hcl.colors(n = 2, palette = 'ag_GrnYl')) + ggtitle('iNKTs (TRAV11_TRAJ18)')
ggsave(filename = 'UMAP_VDJ_iNKTs.pdf', width = 6, height = 5, path = figures_path)
DimPlot_scCustom(seurat, group.by = "highlight", order = T, pt.size = 0.01, colors_use = hcl.colors(n = 2, palette = 'ag_GrnYl'), split.by = 'Groups') + ggtitle('iNKTs (TRAV11_TRAJ18)')
ggsave(filename = 'UMAP_VDJ_iNKTs_by_group.pdf', width = 16, height = 5, path = figures_path)

### MAITs (TRAV1_TRAJ33)

In [None]:
#MAITs
Idents(seurat) <- 'CT_alpha_final'
seurat@meta.data <- mutate(seurat@meta.data, highlight_MAIT=ifelse(CT_alpha_final == "TRAV1_TRAJ33", 'MAIT TCR', 'Other'))

seurat$highlight_MAIT <- factor(seurat$highlight_MAIT, levels=c('Other','MAIT TCR'))
DimPlot_scCustom(seurat, group.by = "highlight_MAIT", order = T, pt.size = 0.01, colors_use = hcl.colors(n = 2, palette = 'ag_GrnYl')) + ggtitle('MAIT (TRAV1_TRAJ33)')
ggsave(filename = 'UMAP_VDJ_MAIT.pdf', width = 6, height = 5, path = figures_path)
DimPlot_scCustom(seurat, group.by = "highlight_MAIT", order = T, pt.size = 0.01, colors_use = hcl.colors(n = 2, palette = 'ag_GrnYl'), split.by = 'Groups') + ggtitle('MAIT (TRAV1_TRAJ33)')
ggsave(filename = 'UMAP_VDJ_MAIT_by_group.pdf', width = 16, height = 5, path = figures_path)

  
  

### QFLs (TRAV9D-3_TRAJ21)

In [None]:
#QFL
Idents(seurat) <- 'CT_alpha_final'
seurat@meta.data <- mutate(seurat@meta.data, highlight_QFL=ifelse(CT_alpha_final == "TRAV9D-3_TRAJ21", 'QFL TCR', 'Other'))

seurat$highlight_QFL <- factor(seurat$highlight_QFL, levels=c('Other','QFL TCR'))
DimPlot_scCustom(seurat, group.by = "highlight_QFL", order = T, pt.size = 0.01, colors_use = hcl.colors(n = 2, palette = 'ag_GrnYl')) + ggtitle('QFL TCR (TRAV9D-3_TRAJ21)')
ggsave(filename = 'UMAP_VDJ_QFL.pdf', width = 8, height = 5, path = figures_path)
DimPlot_scCustom(seurat, group.by = "highlight_QFL", order = T, pt.size = 0.01, colors_use = hcl.colors(n = 2, palette = 'ag_GrnYl'), split.by = 'Groups') + ggtitle('QFL TCR (TRAV9D-3_TRAJ21)')
ggsave(filename = 'UMAP_VDJ_QFL_by_group.pdf', width = 16, height = 5, path = figures_path)

## Repertoire Diversity


### D50 diversity measure

In [None]:
#Extracting TCR data for clusters of interest
Idents(scRNAseq) <- 'cell_types'
combined2 <- scRepertoire:::.expression2List(scRNAseq, split.by ='ident')

cell_types <- unique(scRNAseq$cell_types)

#Initiating results data frame
results <- as.data.frame(matrix(nrow = 0,ncol = length(cell_types)))
colnames(results)
rnames <- c()

#Calculate D50
for (HTO in unique(scRNAseq$hash.ID)) {
    
    result <- c()
    names <- c()

    for (cell_type in cell_types) {
        
        #Extracting data for cell type and HTO
        cell_type_HTO_data <- combined2[[cell_type]]|>
                                    filter(hash.ID == HTO) |>
                                    dplyr::select(c('hash.ID', 'CTaa')) |>
                                    add_count(CTaa, sort=TRUE)

        #Calculating D50
        if (nrow(cell_type_HTO_data) < 20) {
            D50 <- NA 
        } else {
            L50 <- floor(nrow(cell_type_HTO_data)/2)
            number_unique_50 <- cell_type_HTO_data[1:L50,] %>% summarise(n_distinct(CTaa)) %>% as.numeric()
            number_unique_total <- cell_type_HTO_data[] %>% summarise(n_distinct(CTaa)) %>% as.numeric()
            D50 <- number_unique_50/number_unique_total
        }
        result <- c(result, D50)
    }
    results <- rbind(results, result)
    rnames <- c(rnames, HTO)
    print(HTO)

}

print(rnames)
nrow(results)
colnames(results) <- paste0(cell_types, '_D50')
colnames(results)

results <- results %>% mutate(Mouse=rnames) %>% arrange(Mouse) |> relocate(Mouse)
results

write.csv(results, file=paste0(path, 'D50_per_cell_type.csv'), row.names=FALSE)


In [None]:
#### TO DO: VISUALIZATION

### Additional Diversity Measures
Need to finish this per cell type

In [None]:
#Extracting TCR data for clusters of interest
Idents(scRNAseq) <- 'cell_types'
combined2 <- scRepertoire:::.expression2List(scRNAseq, split.by ='ident')[

diversity_measure_results <- clonalDiversity(combined2, cloneCall = 'aa', exportTable = T, n.boots = 100)
#diversity_measure_results <- diversity_measure_results |>
    #separate_wider_delim(Group, '_', names = c('cell_type', 'Sample'))
write.csv(diversity_measure_results, file=paste0(path, 'diversity_measure_results.csv'), row.names=T)


## Overlap Analysis

In [None]:
combined2 <- scRepertoire:::.expression2List(scRNAseq, split.by ='Samples')

morisita_table <- clonalOverlap(combined2, cloneCall = 'aa', chain = 'both',exportTable = T,method = 'morisita')
write.csv(morisita_table, file = 'morisita_table.csv')
head(morisita_table)

morisita_table <- clonalOverlap(combined2, cloneCall = 'aa', chain = 'both',exportTable = T,method = 'overlap')
write.csv(morisita_table, file = 'overlap_coefficient_table.csv')
head(morisita_table)


In [None]:
### TO DO VISUALIZATION

### Circos

In [None]:

mouse_palette <-c(

        '1-Reg-Chow-M' = 'skyblue1',
        '2-Reg-Chow-M' = 'skyblue2',
        '3-Reg-Chow-F' = 'skyblue3',
        '4-Reg-Chow-F' = 'skyblue4',

        '5-Casein-M' = 'peachpuff',
        '6-Casein-M' = 'peachpuff1',
        '7-Casein-M' = 'peachpuff2',
        '8-Casein-F' = 'peachpuff3',
        '9-Casein-F' = 'peachpuff4',

        '10-AminoAcid-M' = 'indianred1',
        '11-AminoAcid-F' = 'indianred2',
        '12-AminoAcid-F' = 'indianred3',
        '13-AminoAcid-F' = 'indianred4'

        )

        

In [None]:
colnames(scRNAseq@meta.data)
Idents(scRNAseq) <- 'seurat_clusters'
DimPlot(scRNAseq, label = T)

In [None]:
# scRNAseq_small <- subset(scRNAseq, subset =  seurat_clusters == '6', invert = T)
scRNAseq_small <- scRNAseq
Idents(scRNAseq_small) <- 'Samples'
combined2 <- scRepertoire:::.expression2List(scRNAseq_small, split.by ='orig.ident')

#head(combined2[[1]])
typeof(combined2[[1]]$clonalFrequency)
TCR_data <- combined2[[1]] |> 
    as_tibble() |>
    dplyr::select(c('CTaa', 'Samples', 'CTgene', 'clonalFrequency'))  |>
    add_count(CTaa, Samples, sort = T,name = 'counts_per_celltype') |>
#    add_count(CTaa, tissue, sort = T,name = 'counts_per_tissue') |>
    group_by(CTaa, ,Samples) |>
    slice_head() |>
    ungroup() |>
    arrange(desc(counts_per_celltype))  |>
    pivot_wider(id_cols= c(CTaa) ,names_from = Samples, values_from = counts_per_celltype,unused_fn = dplyr::first) |>
    #pivot_wider(id_cols= c(CTaa) ,names_from = tissue, values_from = counts_per_tissue, unused_fn = dplyr::first) |>
    mutate_all(~replace(., is.na(.), 0))  |>
    arrange(desc(clonalFrequency)) |>
    mutate(clonotype = paste0('clonotype ', as.character(row_number()) ))


# Th1_TCR_data <- Th1_TCR_data |> 
#     rowwise() |>
#     mutate(max=max(c_across(contains('rep'))),
#             GF_sum=sum(c_across(contains('GF-'))),
#             SPF_sum=sum(c_across(contains('SPF-')))) |>
#     ungroup() |>
#     mutate(spread=total_CTaa_counts-max,
#         Group_sharing=1-abs(GF_sum-SPF_sum)/total_CTaa_counts) |>
#     arrange( desc(Group_sharing))


    #dplyr::count(CTaa, Frequency, cloneType, Samples, Groups, sort = T)
    #distinct()

    # add_count(CTaa, sort=T)
#head( Th1_TCR_data |> filter(CTaa == 'CAANSNNRIFF_CASSLGASAETLYF') )
#head(TCR_data |> filter(skin > 0 & LN > 0 & spleen > 0))
#head(combined2[[1]] |> filter(CTaa == 'CAVRRGSALGRLHF_CASSLGEDTQYF') |> dplyr::select('CTgene'))

write_csv(TCR_data, file='TCR_data_per_mouse.csv')
typeof(TCR_data$clonalFrequency)

TCR_data_plot <- TCR_data |>
                    
                    pivot_longer(!c(CTaa, CTgene, clonalFrequency, clonotype), names_to = 'mouse',values_to = 'mouse_counts')

head(TCR_data_plot)
typeof(TCR_data_plot$clonalFrequency)


Circos

In [None]:
unique(TCR_data_plot$mouse)
TCR_data_plot_old <- TCR_data_plot
unique(TCR_data_plot_old$mouse)
TCR_data_plot <- TCR_data_plot |>
    #filter(skin > 0) |>
    group_by(mouse) |>
    mutate(TCR_sequence_count_by_cell_type = sum(mouse_counts)) |>
    arrange(desc(mouse_counts)) |>
    mutate(TCR_sequence_position_by_cell_type = cumsum(mouse_counts))  |>
    mutate(TCR_sequence_start_position_by_cell_type =  c(0, TCR_sequence_position_by_cell_type[-length(TCR_sequence_position_by_cell_type)]))   |>
    ungroup() |>
    rowwise() |> 
    mutate(coordinates = list(c(TCR_sequence_start_position_by_cell_type, TCR_sequence_position_by_cell_type))) |>
    ungroup() |>
    mutate(mouse = factor(mouse, levels = c(      
                                                '1-Reg-Chow-M', 
                                                '2-Reg-Chow-M',
                                                '3-Reg-Chow-F',
                                                '4-Reg-Chow-F',
                                                '5-Casein-M',
                                                '6-Casein-M',
                                                '7-Casein-M',
                                                '8-Casein-F',
                                                '9-Casein-F',
                                                '10-AminoAcid-M',
                                                '11-AminoAcid-F',
                                                '12-AminoAcid-F',
                                                '13-AminoAcid-F')))  #|> 
    #filter(!(mouse %in% c('Naive', 'activated T cells', 'TCM', 'Th1')))
    




#head(TCR_data_plot |> arrange(mouse_counts) |> filter(mouse == 'Tregs'), 30)
#tail(TCR_data_plot |> filter(mouse == 'Naive'))
levels(TCR_data_plot$mouse)

In [None]:
head(TCR_data_plot)

In [None]:
TCR_data_plot_distinct <- TCR_data_plot |> dplyr::select(c('mouse', 'TCR_sequence_count_by_cell_type')) |> distinct() |> arrange(mouse) |> mutate(origin = TCR_sequence_count_by_cell_type*0) |> column_to_rownames('mouse') |> relocate(origin)  |> as.matrix()
head(TCR_data_plot_distinct, 12)

In [None]:
unique(scRNAseq$Samples)

In [None]:

grid_cols <- mouse_palette
grid_cols['13-AminoAcid-F']


In [None]:
###########Circos####################
#path <- './'
cluster <- 'all'
pdf(file = paste0(path, 'Circos_TCR_per_mouse_TH1_only', '.pdf')) #Initialize plot

# Setting parameters
circos.par(gap.degree = 2, track.height = 0.1, cell.padding = c(0, 0, 0, 0), circle.margin = 0.8)

# Initializing plot sectors
circos.initialize(xlim = TCR_data_plot_distinct)

#Assigning colors to each cluster
#grid_cols <- scales::hue_pal()(length(levels(TCR_data_plot$mouse))) #|> rev()
    #names(grid_cols) <- levels(TCR_data_plot$mouse)

grid_cols <- mouse_palette
names(grid_cols) <- levels(scRNAseq$Samples)

#Drawing axes 
circos.track(ylim = c(0,1),
 panel.fun = function(x, y) {
        print(CELL_META$xrange[[1]])
        
        if (CELL_META$cell.width < 45) {
            circos.text(CELL_META$xcenter, 
            CELL_META$cell.ylim[2] + mm_y(10), 
            adj = c(0 , 1),
            CELL_META$sector.index,
            facing = 'clockwise', 
            niceFacing = T, 
            cex  = 0.7)



        } else {
            circos.text(CELL_META$xcenter, 
            CELL_META$cell.ylim[2] + mm_y(11),
            #adj = c(0, 1), 
            CELL_META$sector.index,
            facing = 'bending.inside', 
            niceFacing = T,
            cex = 0.8
            )

        }

        if (CELL_META$xrange[[1]] > 500) {
            circos.axis(labels.cex = 0.5, 
            minor.ticks =  0,
            major.tick = 1,
            labels.facing = 'clockwise',
            major.at = seq(500, CELL_META$xrange[[1]], by = 500))
        }

        highlight.sector(CELL_META$sector.index, col = grid_cols[CELL_META$sector.index])
    })

done <- c() #To avoid plotting the same link twice

# Plot links between clusters
for (origin_cell_type in levels(TCR_data_plot$mouse)) {
    for (target_cell_type in levels(TCR_data_plot$mouse)) {
        if (origin_cell_type == target_cell_type | target_cell_type %in% done) {
            next
        }    else {
                table_one <- TCR_data_plot |>
                filter(mouse == origin_cell_type & mouse_counts != 0)  |>
                dplyr::select(c('clonotype', 'mouse', 'coordinates')) 
        
                table_two <- TCR_data_plot |>
                filter(mouse == target_cell_type & mouse_counts != 0) |>
                dplyr::select(c('clonotype', 'mouse', 'coordinates'))

                link_table  <-  inner_join(table_one, table_two, by = 'clonotype') |> column_to_rownames(var = 'clonotype')

                if (str_detect(origin_cell_type, 'Reg') & str_detect(target_cell_type, 'Reg')) {
                    color = alpha(grid_cols[['3-Reg-Chow-F']], 1)}
                     
                else if (str_detect(origin_cell_type, 'Casein') & str_detect(target_cell_type, 'Casein')) {
                    #color = alpha('magenta', 0.6)}
                    color = alpha(grid_cols[['8-Casein-F']], 1)}
                    
                else if (str_detect(origin_cell_type, 'AminoAcid') & str_detect(target_cell_type, 'AminoAcid')) {
                    color = alpha(grid_cols[['12-AminoAcid-F']], 1)}
                
                else {
                    color = alpha('gray', 0.15)
                }

                # color = alpha(grid_cols[origin_cell_type], 0.5)

                for (clonotype1 in rownames(link_table)) {
                    
                    index1 <- link_table[[clonotype1, 'mouse.x']]
                    index2 <- link_table[[clonotype1, 'mouse.y']]
                    coordinates1 <- as.vector(link_table[[clonotype1, 'coordinates.x']])
                    coordinates2 <- as.vector(link_table[[clonotype1, 'coordinates.y']])

                    # Plot link
                    circos.link(link_table[[clonotype1, 'mouse.x']], 
                                as.vector(link_table[[clonotype1, 'coordinates.x']]),
                                link_table[[clonotype1, 'mouse.y']],
                                as.vector(link_table[[clonotype1, 'coordinates.y']]),
                                col = color)
                }

        }
    }
    
     done <- c(done, origin_cell_type)

}
    
title(paste0('TCR Overlap ', cluster, ' per mouse'))

dev.off()

circos.clear()



## 