In [3]:
import csv
import glob
from collections import defaultdict

In [4]:
cluster_size =  defaultdict(int)

with open('/lustre/scratch117/cellgen/team218/gp7/Joe/MicroExonator/umap_NoRegressedAnnotations.tsv') as cell_metadata:
    
    reader = csv.DictReader(cell_metadata, delimiter="\t")
    
    for row in reader:
        cluster_size[row["leiden"]] += 1

In [9]:
cluster_comparison_weight = dict()
compare_clusters = dict()
cluster_total_weight = defaultdict(int)

with open('/lustre/scratch117/cellgen/team218/gp7/Joe/MicroExonator/run_metadata.tsv') as run_metadata:
    
    reader = csv.DictReader(run_metadata, delimiter="\t")
    
    for row in reader:
        
        A_clusters = row["A.cluster_names"].split(",")
        B_clusters = row["B.cluster_names"].split(",")
        
        total_cells_A = sum([ cluster_size[a] for a in A_clusters])
        total_cells_B = sum([ cluster_size[b] for b in B_clusters])
        
        compare_clusters[row["Compare_ID"]] = [A_clusters, B_clusters]
        
        for a in A_clusters:
            cluster_comparison_weight[(row["Compare_ID"], a)] = cluster_size[a]/( total_cells_A)
            cluster_total_weight[a] += cluster_size[a]/(total_cells_A)
            
        for b in B_clusters:
            cluster_comparison_weight[(row["Compare_ID"], b)] = cluster_size[b]/( total_cells_B) 
            cluster_total_weight[b] += cluster_size[b]/( total_cells_B)

In [10]:
cluster_comparison_weight

{('FirstHeart_vs_Primitive', '27'): 1.0,
 ('FirstHeart_vs_Primitive', '32'): 1.0,
 ('FistHeart_vs_SecondHeart', '27'): 1.0,
 ('FistHeart_vs_SecondHeart', '34'): 1.0,
 ('SecondHeart_vs_Miofibro', '34'): 1.0,
 ('SecondHeart_vs_Miofibro', '28'): 1.0,
 ('Blood_core', '23'): 1.0,
 ('Blood_core', '0'): 1.0,
 ('Blood_subdivition', '0'): 1.0,
 ('Blood_subdivition', '36'): 1.0,
 ('Blood_total', '0'): 0.9304908634897886,
 ('Blood_total', '36'): 0.0695091365102114,
 ('Blood_total', '23'): 1.0,
 ('Intermediate_mesoderm', '3'): 1.0,
 ('Intermediate_mesoderm', '35'): 1.0,
 ('Intermediate_mesoderm_primordium', '5'): 1.0,
 ('Intermediate_mesoderm_primordium', '35'): 1.0,
 ('Thophectoderm_Epiblast', '19'): 1.0,
 ('Thophectoderm_Epiblast', '29'): 1.0,
 ('Spinal_cord_progenitors_diff', '30'): 1.0,
 ('Spinal_cord_progenitors_diff', '13'): 1.0,
 ('Spinal_cord_diff', '6'): 1.0,
 ('Spinal_cord_diff', '13'): 1.0,
 ('Spinal_cord_brain', '30'): 0.15258855585831063,
 ('Spinal_cord_brain', '6'): 0.475136239782016

In [13]:

cluster_coord_dPSI = defaultdict(int)
coord_info = dict()
diff_coord_cluster = set([])

for file in glob.glob('/lustre/scratch117/cellgen/team218/gp7/Joe/MicroExonator/Whippet/Delta/Single_Cell/Sig_nodes/*.txt'):
    
    compare_ID = file.split("/")[-1].split(".")[0]
    
    with open(file) as res:
        
        reader = csv.DictReader(res, delimiter="\t")
        
        A_clusters, B_clusters = compare_clusters[compare_ID]
        
        total_cells_A = sum([ cluster_size[x] for x in A_clusters])
        
        for row in reader:
            
            coord_info[row['Coord']] = [row['Gene'], row['Node'], row['Strand'], row['Type']]
            
            for a in A_clusters:
                w = cluster_comparison_weight[(compare_ID, a)]
                cluster_coord_dPSI[(a, row['Coord'])] += (float(row['DeltaPsi.mean']) * w)/cluster_total_weight[a]
                
                if row['diff']=="TRUE":
                    diff_coord_cluster.add((a, row['Coord']))

            for b in B_clusters:
                w = cluster_comparison_weight[(compare_ID, b)]
                cluster_coord_dPSI[(b, row['Coord'])] += (-float(row['DeltaPsi.mean']) * w)/cluster_total_weight[b]
                
                if row['diff']=="TRUE":
                    diff_coord_cluster.add((b, row['Coord']))
    

In [14]:

with open("/lustre/scratch117/cellgen/team218/gp7/Joe/MicroExonator/Whippet/Delta/Single_Cell/Sig_nodes/cluster_sig_nodes.tsv", "w") as out:
    
    out.write("\t".join(["Cluster_ID", "Coord", "Gene", "Node", "Strand", "Type", "Weighted_mean_dPSI"]) + "\n" )

    for key, Weighted_mean_dPSI in cluster_coord_dPSI.items():
        
        Cluster_ID, Coord = key
        
        if (Cluster_ID, Coord) in diff_coord_cluster:
        
            Gene, Node, Strand, Type = coord_info[Coord]

            out.write("\t".join([Cluster_ID, Coord, Gene, Node, Strand, Type, str(Weighted_mean_dPSI)]) + "\n" )
        
    
    
    
    

In [1]:
2+2

4