In [1]:
import pandas as pd
import numpy as np
import scanpy as sc
import pickle
import scipy
import itertools

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context
import seaborn as sns
from ete3 import Tree
from copy import copy
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [3]:
clusterColorsFile = "/Genomics/chanlab/mchan/Adriano/TLS/TLS_TLSCL/20211102_clusterColorsTLSCL.p"
with open(clusterColorsFile,'rb') as fp:
    colorDict = pickle.load(fp)
    
cell_state_table = pd.read_csv('/Genomics/chanlab/blaw/TLS/metadata/TLS_Explant_Total_cellBC_cellState.tsv', sep ='\t')

explant_multiseq = pd.read_csv('/Genomics/chanlab/blaw/TLS/metadata/AM-DNA-341_MultiSeqBC.tsv', sep = '\t')
out1_multiseq = pd.read_csv('/Genomics/chanlab/blaw/TLS/metadata/AM-DNA-342_MultiSeqBC.tsv', sep = '\t')
out2_multiseq = pd.read_csv('/Genomics/chanlab/blaw/TLS/metadata/AM-DNA-343_MultiSeqBC.tsv', sep = '\t')

total_multiseq = pd.concat([explant_multiseq, out1_multiseq, out2_multiseq])

total_multiseq.to_csv('/Genomics/chanlab/blaw/TLS/metadata/TLS_Explant_Total_MultiSeqBC.tsv', sep = '\t', index = False)

barcodes = ['Bar1', 'Bar2', 'Bar3', 'Bar4', 'Bar5', 'Bar6']
timepoints = ['120', '144']

# Record the cell state counts per condition
I will test all combinations of the following
- Edited / Unedited
- Each barcode (All 6)
- Each timepoint (120, 144)

This will make 24 comparisons

In [4]:
IDs = []
for barcode in barcodes:
    for time in ['120', '144']:
        for i in ['Edited', 'Unedited']:
            IDs.append('{}_{}_{}'.format(barcode, time, i))
            
structure_composition = pd.DataFrame(0, columns = colorDict.keys(), index = IDs)

In [7]:
temp = cell_state_table.copy()
temp.set_index('cellBC', inplace = True)

for barcode in barcodes:
    treeFile = '/Genomics/chanlab/blaw/TLS/data/explant/lineage/3_lineage_reconstruction/{}/120_144/hybrid/{}_120_144_hybrid_newick_noMutationlessEdges_Labeled.nwk'.format(barcode, barcode)
    t = Tree(treeFile, format = 1)
    leaves = [leaf.name for leaf in t.get_leaves()]
    
    total_120_cells = ['Tracer_Explant_' + i for i in explant_multiseq[explant_multiseq['final.calls.rescued'] == barcode]['cellBC']]
    total_144_cells = ['Tracer_Outgrowth_1_' + i for i in out1_multiseq[out1_multiseq['final.calls.rescued'] == barcode]['cellBC']]
    total_144_2_cells = ['Tracer_Outgrowth_2_' + i for i in out2_multiseq[out2_multiseq['final.calls.rescued'] == barcode]['cellBC']]
    
    edited_120_cells = []
    edited_144_cells = []
    edited_144_2_cells = []
    
    unedited_120_cells = []
    unedited_144_cells = []
    unedited_144_2_cells = []
    temp_counts = {}
    for cell_state in temp['cell_state'].unique():
        temp_counts[cell_state] = 0
    
    for cell in total_120_cells:
        if cell in leaves:
            edited_120_cells.append(cell)
        else:
            unedited_120_cells.append(cell)
            
    for cell in total_144_cells:
        if cell in leaves:
            edited_144_cells.append(cell)
        else:
            unedited_144_cells.append(cell)
            
    for cell in total_144_2_cells:
        if cell in leaves:
            edited_144_2_cells.append(cell)
        else:
            unedited_144_2_cells.append(cell)
            
            
    for cell in edited_120_cells:
        cell_state = temp.loc[cell, 'cell_state']
        structure_composition.loc['{}_{}_Edited'.format(barcode, '120'), cell_state] += 1
        
    for cell in unedited_120_cells:
        cell_state = temp.loc[cell, 'cell_state']
        structure_composition.loc['{}_{}_Unedited'.format(barcode, '120'), cell_state] += 1
        
    for cell in edited_144_cells:
        cell_state = temp.loc[cell, 'cell_state']
        structure_composition.loc['{}_{}_Edited'.format(barcode, '144'), cell_state] += 1
        
    for cell in unedited_144_cells:
        cell_state = temp.loc[cell, 'cell_state']
        structure_composition.loc['{}_{}_Unedited'.format(barcode, '144'), cell_state] += 1
        
    for cell in edited_144_2_cells:
        cell_state = temp.loc[cell, 'cell_state']
        structure_composition.loc['{}_{}_Edited'.format(barcode, '144'), cell_state] += 1
        
    for cell in unedited_144_2_cells:
        cell_state = temp.loc[cell, 'cell_state']
        structure_composition.loc['{}_{}_Unedited'.format(barcode, '144'), cell_state] += 1

In [8]:
cell_totals = {}

for ID in structure_composition.index:
    cell_totals[ID] = structure_composition.loc[ID].sum()

In [9]:
structure_composition['Sum'] = structure_composition.sum(axis = 1)
structure_composition_frac = structure_composition.div(structure_composition["Sum"], axis=0)

In [10]:
structure_composition_frac.drop(columns = ['Sum'], inplace = True)

In [12]:
structure_composition_frac.to_csv('/Genomics/chanlab/blaw/TLS/data/explant/composition/structure_compositions_frac.txt', sep = '\t')
structure_composition.to_csv('/Genomics/chanlab/blaw/TLS/data/explant/composition/structure_compositions.txt', sep = '\t')

In [15]:
colors = []
for color in structure_composition_frac.columns:
    colors.append(colorDict[color])
    
structure_composition_frac.plot(kind='bar', use_index = True, stacked=True, color = colors, grid = False, figsize = (15, 10))
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), prop={'size': 10})
plt.title('cell_state_proportions')
plt.ylabel('% cells/cluster')
plt.savefig('/Genomics/chanlab/blaw/TLS/data/explant/composition/explant_compositions.pdf', dpi = 300)
#plt.show()
plt.close()

In [19]:
labels = []
for cell_state in colorDict.keys():
    temp_edited = []
    temp_unedited = []
    for barcode in barcodes:
        for time in ['120', '144']:
            temp_edited.append(structure_composition_frac.loc['{}_{}_Edited'.format(barcode, time), cell_state])
            temp_unedited.append(structure_composition_frac.loc['{}_{}_Unedited'.format(barcode, time), cell_state])
            
    plt.plot(temp_edited, temp_unedited, '.', color = colorDict[cell_state])
    labels.append(cell_state)
plt.legend(labels, bbox_to_anchor=(1, 1.05))
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel('edited')
plt.ylabel('unedited')
plt.savefig('/Genomics/chanlab/blaw/TLS/data/explant/composition/explant_edited_vs_unedited_composition.pdf', dpi = 300)
#plt.show()
plt.close()