Due to libary conflicts this notebook is executed using an different environment. 
```
conda install -c bioconda anndata2ri
conda install nb_conda_kernels
conda install -c bioconda scanpy 
```

In [1]:
import scanpy as sc
import anndata as ann
import pandas as pd
import numpy as np

In [2]:
import anndata2ri
anndata2ri.activate()
%load_ext rpy2.ipython

In [3]:
path_raw_base = '../data/Borcherding/raw/'
path_out_base = '../data/Borcherding/'

## Convert rds to h5ad

In [4]:
path_combinedtcr = path_raw_base + 'CombinedTCR_object.rds'  #
path_filtered = path_raw_base + 'filtered_seuratObjects_harmony-001.rds'  #
path_gse123 = path_raw_base + 'GSE123814.rds' #
path_gse144 = path_raw_base + 'GSE144469_TCR_filtered_contig_annotations_all.csv'  # 

In [5]:
out_combined = path_out_base + 'tcrs_combined.csv'
out_filtered = path_out_base + 'rna_combined.h5ad'
out_gse123 = path_out_base + 'rna_123.h5ad'
out_gse144 = path_out_base + 'tcrs_144.csv'

### TCR Data

In [6]:
%%R -i path_combinedtcr -o combined_tcr
combined_tcr <- readRDS(path_combinedtcr)

In [7]:
combined_tcr = list(combined_tcr)
combined_tcr = pd.concat(combined_tcr, ignore_index=True)
combined_tcr.to_csv(out_combined)
combined_tcr

Unnamed: 0,barcode,sample,ID,TCR1,cdr3_aa1,cdr3_nt1,TCR2,cdr3_aa2,cdr3_nt2,CTgene,CTnt,CTaa,CTstrict,cellType,Patient
0,BCT1.1_AAACCTGCAGATCGGA-1,BCT1.1,ID,,,,TRBV4-1..TRBJ2-5.TRBC2,CASRLAGGLQETQYF,TGCGCCAGCCGCCTAGCGGGGGGGCTCCAAGAGACCCAGTACTTC,NA_TRBV4-1..TRBJ2-5.TRBC2,NA_TGCGCCAGCCGCCTAGCGGGGGGGCTCCAAGAGACCCAGTACTTC,NA_CASRLAGGLQETQYF,NA_NA_TRBV4-1..TRBJ2-5.TRBC2_TGCGCCAGCCGCCTAGC...,T-AB,Patient99
1,BCT1.1_AAACGGGTCATAGCAC-1,BCT1.1,ID,TRAV13-2.TRAJ3.TRAC,CAETILYSSASKIIF,TGTGCAGAGACTATTTTGTACAGCAGTGCTTCCAAGATAATCTTT,TRBV30..TRBJ1-6.TRBC1,CAWTTPGTSNSPLHF,TGTGCCTGGACCACCCCCGGGACCAGTAATTCACCCCTCCACTTT,TRAV13-2.TRAJ3.TRAC_TRBV30..TRBJ1-6.TRBC1,TGTGCAGAGACTATTTTGTACAGCAGTGCTTCCAAGATAATCTTT_...,CAETILYSSASKIIF_CAWTTPGTSNSPLHF,TRAV13-2.TRAJ3.TRAC_TGTGCAGAGACTATTTTGTACAGCAG...,T-AB,Patient99
2,BCT1.1_AAAGATGAGACAGGCT-1,BCT1.1,ID,TRAV26-1.TRAJ23.TRAC,CIVSLSLVIYNQGGKLIF,TGCATCGTCAGTCTTTCTCTTGTGATTTATAACCAGGGAGGAAAGC...,TRBV28..TRBJ2-7.TRBC2,CASSSSWEGSPGEQYF,TGTGCCAGCAGCTCATCCTGGGAGGGGAGTCCGGGCGAGCAGTACTTC,TRAV26-1.TRAJ23.TRAC_TRBV28..TRBJ2-7.TRBC2,TGCATCGTCAGTCTTTCTCTTGTGATTTATAACCAGGGAGGAAAGC...,CIVSLSLVIYNQGGKLIF_CASSSSWEGSPGEQYF,TRAV26-1.TRAJ23.TRAC_TGCATCGTCAGTCTTTCTCTTGTGA...,T-AB,Patient99
3,BCT1.1_AAAGATGCACAAGCCC-1,BCT1.1,ID,TRAV2.TRAJ9.TRAC,CAVERNTGGFKTIF,TGTGCTGTGGAGCGAAATACTGGAGGCTTCAAAACTATCTTT,TRBV7-8..TRBJ1-5.TRBC1,CASSQLGNGNQPQHF,TGTGCCAGCAGCCAATTGGGGAATGGCAATCAGCCCCAGCATTTT,TRAV2.TRAJ9.TRAC_TRBV7-8..TRBJ1-5.TRBC1,TGTGCTGTGGAGCGAAATACTGGAGGCTTCAAAACTATCTTT_TGT...,CAVERNTGGFKTIF_CASSQLGNGNQPQHF,TRAV2.TRAJ9.TRAC_TGTGCTGTGGAGCGAAATACTGGAGGCTT...,T-AB,Patient99
4,BCT1.1_AAAGATGTCTGAGTGT-1,BCT1.1,ID,,,,TRBV28..TRBJ1-1.TRBC1,CASSLPGARVAFF,TGTGCCAGCAGTTTGCCGGGGGCCCGGGTGGCTTTCTTT,NA_TRBV28..TRBJ1-1.TRBC1,NA_TGTGCCAGCAGTTTGCCGGGGGCCCGGGTGGCTTTCTTT,NA_CASSLPGARVAFF,NA_NA_TRBV28..TRBJ1-1.TRBC1_TGTGCCAGCAGTTTGCCG...,T-AB,Patient99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1213257,SCT4.2_TTTGTCAAGTGTTGAA-1,SCT4.2,ID,TRAV8-3.TRAJ43.TRAC,CAVGNNDMRF,TGTGCTGTGGGGAACAATGACATGCGCTTT,TRBV5-1.TRBD1.TRBJ2-6.TRBC2,CASSLAQGAGANVLTF,TGCGCCAGCAGCTTGGCCCAGGGTGCTGGGGCCAACGTCCTGACTTTC,TRAV8-3.TRAJ43.TRAC_TRBV5-1.TRBD1.TRBJ2-6.TRBC2,TGTGCTGTGGGGAACAATGACATGCGCTTT_TGCGCCAGCAGCTTG...,CAVGNNDMRF_CASSLAQGAGANVLTF,TRAV8-3.TRAJ43.TRAC_TGTGCTGTGGGGAACAATGACATGCG...,T-AB,Patient98
1213258,SCT4.2_TTTGTCACAAATACAG-1,SCT4.2,ID,TRAV38-1.TRAJ44.TRAC,CAFMKQATASKLTF,TGTGCTTTCATGAAGCAGGCTACTGCCAGTAAACTCACCTTT,TRBV12-3..TRBJ1-5.TRBC1,CASSLWAGEGQPQHF,TGTGCCAGCAGTTTATGGGCAGGGGAAGGTCAGCCCCAGCATTTT,TRAV38-1.TRAJ44.TRAC_TRBV12-3..TRBJ1-5.TRBC1,TGTGCTTTCATGAAGCAGGCTACTGCCAGTAAACTCACCTTT_TGT...,CAFMKQATASKLTF_CASSLWAGEGQPQHF,TRAV38-1.TRAJ44.TRAC_TGTGCTTTCATGAAGCAGGCTACTG...,T-AB,Patient98
1213259,SCT4.2_TTTGTCAGTCTAACGT-1,SCT4.2,ID,TRAV17.TRAJ56.TRAC,CATVTTGANSKLTF,TGTGCTACGGTCACTACTGGAGCCAATAGTAAGCTGACATTT,TRBV5-1..TRBJ2-1.TRBC2,CASSLEAASSYNEQFF,TGCGCCAGCAGCTTGGAGGCGGCGAGCTCCTACAATGAGCAGTTCTTC,TRAV17.TRAJ56.TRAC_TRBV5-1..TRBJ2-1.TRBC2,TGTGCTACGGTCACTACTGGAGCCAATAGTAAGCTGACATTT_TGC...,CATVTTGANSKLTF_CASSLEAASSYNEQFF,TRAV17.TRAJ56.TRAC_TGTGCTACGGTCACTACTGGAGCCAAT...,T-AB,Patient98
1213260,SCT4.2_TTTGTCAGTTCAGTAC-1,SCT4.2,ID,TRAV17.TRAJ22.TRAC,CATAGSARQLTF,TGTGCTACGGCGGGTTCTGCAAGGCAACTGACCTTT,TRBV12-3..TRBJ2-3.TRBC2,CASSLSVSTDTQYF,TGTGCCAGCAGCCTTAGCGTTAGCACAGATACGCAGTATTTT,TRAV17.TRAJ22.TRAC_TRBV12-3..TRBJ2-3.TRBC2,TGTGCTACGGCGGGTTCTGCAAGGCAACTGACCTTT_TGTGCCAGC...,CATAGSARQLTF_CASSLSVSTDTQYF,TRAV17.TRAJ22.TRAC_TGTGCTACGGCGGGTTCTGCAAGGCAA...,T-AB,Patient98


### Transcriptome

In [8]:
%%R -i path_filtered -o adata_filtered
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library('SingleCellExperiment', lib='/home/icb/felix.drost/miniconda3/envs/rds/lib/R/lib'))
rds <- readRDS(path_filtered)
adata_filtered <- as.SingleCellExperiment(rds)

In [9]:
adata_filtered.obs['nFeature_RNA'] = adata_filtered.obs['nFeature_RNA'].values.astype(int)

In [10]:
adata_filtered.write_h5ad(out_filtered)
adata_filtered

  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'orig.ident' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'db.class' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'HPCA.first.labels' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'HPCA.labels' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'HPCA.pruned.labels' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'DICE.first.labels' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'DICE.labels' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'DICE.pruned.labels' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'functional.cluster' as categorical
  c.reorder_categories(natsorted(c.categorie

  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Tissue' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Sample' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Sorted' as categorical


AnnData object with n_obs × n_vars = 1593679 × 59326
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'mito.genes', 'db.weight.score', 'db.ratio', 'db.class', 'db.score', 'HPCA.first.labels', 'HPCA.labels', 'HPCA.pruned.labels', 'DICE.first.labels', 'DICE.labels', 'DICE.pruned.labels', 'functional.cluster', 'functional.cluster.conf', 'consensus.major', 'consensus.Tcell', 'barcode', 'CTgene', 'CTnt', 'CTaa', 'CTstrict', 'Frequency', 'cloneType', 'Treg_UCell', 'CD8_Activation_UCell', 'Antinflammatory_UCell', 'Anergy_UCell', 'Proinflammatory_UCell', 'Lipid_mediators_UCell', 'Glycolysis_UCell', 'TCA_cycle_UCell', 'PPS_UCell', 'Glycogen_Metabolism_UCell', 'Glucose_Deprivation_UCell', 'M1.Macro_UCell', 'M2.Macro_UCell', 'Cytolytic_UCell', 'T1_Interferon_UCell', 'T2_Interferon_UCell', 'Hypoxia_UCell', 'T_Cell_Terminal_Differentiation_UCell', 'G1S_UCell', 'G2M_UCell', 'SampleID', 'GEO_RNA', 'Cohort', 'Type', 'Tissue', 'Sample', 'Sorted', 'RNA_snn_res.0.8', 'RNA_snn_res.1.2', 'RNA_snn_res.1

### Second experiment

In [11]:
%%R -i path_gse123 -o gse123
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library('SingleCellExperiment', lib='/home/icb/felix.drost/miniconda3/envs/rds/lib/R/lib'))
rds <- readRDS(path_gse123)
gse123 <- as.SingleCellExperiment(rds)

In [12]:
gse123

AnnData object with n_obs × n_vars = 78034 × 23467
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'mito.genes', 'db.weight.score', 'db.ratio', 'db.class', 'db.score', 'SampleID', 'GEO_RNA', 'Cohort', 'Type', 'Tissue', 'Sample', 'Sorted', 'HPCA.first.labels', 'HPCA.labels', 'HPCA.pruned.labels', 'DICE.first.labels', 'DICE.labels', 'DICE.pruned.labels', 'functional.cluster', 'functional.cluster.conf', 'consensus.major', 'consensus.Tcell', 'Treg_UCell', 'CD8_Activation_UCell', 'Antinflammatory_UCell', 'Anergy_UCell', 'Proinflammatory_UCell', 'Lipid_mediators_UCell', 'Glycolysis_UCell', 'TCA_cycle_UCell', 'PPS_UCell', 'Glycogen_Metabolism_UCell', 'Glucose_Deprivation_UCell', 'M1.Macro_UCell', 'M2.Macro_UCell', 'Cytolytic_UCell', 'T1_Interferon_UCell', 'T2_Interferon_UCell', 'Hypoxia_UCell', 'T_Cell_Terminal_Differentiation_UCell', 'G1S_UCell', 'G2M_UCell', 'barcode', 'CTgene', 'CTnt', 'CTaa', 'CTstrict', 'Frequency', 'cloneType', 'ident'
    layers: 'logcounts'

In [13]:
gse123.obs['nFeature_RNA'] = gse123.obs['nFeature_RNA'].values.astype(int)

In [14]:
gse123.write_h5ad(out_gse123)
gse123

  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'orig.ident' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'db.class' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'SampleID' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'GEO_RNA' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Cohort' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Type' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Tissue' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Sample' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Sorted' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'HPCA.first.labels' as categorical
  

  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'CTaa' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'CTstrict' as categorical


AnnData object with n_obs × n_vars = 78034 × 23467
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'mito.genes', 'db.weight.score', 'db.ratio', 'db.class', 'db.score', 'SampleID', 'GEO_RNA', 'Cohort', 'Type', 'Tissue', 'Sample', 'Sorted', 'HPCA.first.labels', 'HPCA.labels', 'HPCA.pruned.labels', 'DICE.first.labels', 'DICE.labels', 'DICE.pruned.labels', 'functional.cluster', 'functional.cluster.conf', 'consensus.major', 'consensus.Tcell', 'Treg_UCell', 'CD8_Activation_UCell', 'Antinflammatory_UCell', 'Anergy_UCell', 'Proinflammatory_UCell', 'Lipid_mediators_UCell', 'Glycolysis_UCell', 'TCA_cycle_UCell', 'PPS_UCell', 'Glycogen_Metabolism_UCell', 'Glucose_Deprivation_UCell', 'M1.Macro_UCell', 'M2.Macro_UCell', 'Cytolytic_UCell', 'T1_Interferon_UCell', 'T2_Interferon_UCell', 'Hypoxia_UCell', 'T_Cell_Terminal_Differentiation_UCell', 'G1S_UCell', 'G2M_UCell', 'barcode', 'CTgene', 'CTnt', 'CTaa', 'CTstrict', 'Frequency', 'cloneType', 'ident'
    layers: 'logcounts'

In [15]:
gse144 = pd.read_csv(path_gse144, index_col=0)
gse144.to_csv(out_gse144)
gse144

Unnamed: 0,barcode,is_cell,contig_id,high_confidence,length,chain,v_gene,d_gene,j_gene,c_gene,full_length,productive,cdr3,cdr3_nt,reads,umis,raw_clonotype_id,raw_consensus_id
1,AAACCTGAGAAGGTTT-NC4,True,AAACCTGAGAAGGTTT-NC4_contig_1,True,459,TRA,TRAV13-1,,TRAJ9,TRAC,True,True,CAARDTGGFKTIF,TGTGCAGCAAGAGATACTGGAGGCTTCAAAACTATCTTT,4400,5,clonotype48,clonotype48_consensus_1
2,AAACCTGAGAAGGTTT-NC4,True,AAACCTGAGAAGGTTT-NC4_contig_2,True,498,TRB,TRBV20-1,,TRBJ2-1,TRBC2,True,True,CSAGSGGLIHEQFF,TGCAGTGCTGGAAGCGGGGGGTTGATTCATGAGCAGTTCTTC,7972,14,clonotype48,clonotype48_consensus_2
3,AAACCTGAGACAGGCT-CT7,True,AAACCTGAGACAGGCT-CT7_contig_1,True,475,TRB,TRBV4-1,,TRBJ1-2,TRBC1,True,True,CASSQVWGKKSTF,TGCGCCAGCAGCCAAGTATGGGGGAAGAAGAGTACCTTC,10886,30,clonotype12,clonotype12_consensus_2
4,AAACCTGAGACAGGCT-CT7,True,AAACCTGAGACAGGCT-CT7_contig_2,True,631,TRA,TRAV41,,TRAJ52,TRAC,True,True,CAVRFYLAGGTSYGKLTF,TGTGCTGTCAGATTCTACCTTGCTGGTGGTACTAGCTATGGAAAGC...,5056,12,clonotype12,clonotype12_consensus_1
5,AAACCTGAGACAGGCT-CT7,True,AAACCTGAGACAGGCT-CT7_contig_3,True,344,TRA,,,TRAJ45,TRAC,False,False,,,504,2,clonotype12,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200497,TTTGTCATCTTGGGTA-NC6,True,TTTGTCATCTTGGGTA-NC6_contig_1,True,574,TRA,TRAV17,,TRAJ8,TRAC,True,True,CSTGFQKLVF,TGTTCTACAGGCTTTCAGAAACTTGTATTT,932,4,clonotype1744,clonotype1744_consensus_3
200498,TTTGTCATCTTGGGTA-NC6,True,TTTGTCATCTTGGGTA-NC6_contig_2,True,547,TRA,TRAV12-2,,TRAJ22,TRAC,True,True,CAVNHPGSARQLTF,TGTGCCGTGAACCACCCTGGTTCTGCAAGGCAACTGACCTTT,1174,4,clonotype1744,clonotype1744_consensus_1
200499,TTTGTCATCTTGGGTA-NC6,True,TTTGTCATCTTGGGTA-NC6_contig_3,True,525,TRB,TRBV13,,TRBJ2-5,TRBC2,True,True,CASSDIQGRAETQYF,TGTGCCAGCAGCGACATACAGGGTAGGGCAGAGACCCAGTACTTC,3744,18,clonotype1744,clonotype1744_consensus_2
200500,TTTGTCATCTTGTTTG-NC3,True,TTTGTCATCTTGTTTG-NC3_contig_1,True,495,TRA,TRAV9-2,,TRAJ13,TRAC,True,True,CALIPAAGYQKVTF,TGTGCTCTGATTCCTGCCGCGGGTTACCAGAAAGTTACCTTT,13602,17,clonotype52013,clonotype52013_consensus_2
