In [10]:
import matplotlib.pyplot as plt
import json
import numpy as np
import os
from pathlib import Path
import pandas as pd

In [None]:
basepath = Path("/home/unix/wangyanz/codon_usage/scRNA")
data_pool = basepath.joinpath("data")
cell_type_pool = data_pool.joinpath("cell_type")
pics_dir = data_pool.joinpath("pics")
if not os.path.exists(pics_dir):
    os.mkdir(pics_dir)

In [None]:
cells = cell_type_pool.glob("*.json")
cell_data = {}

for cell in cells:
    with open(cell, "r") as fp:
        data = json.load(fp)
    cell_data[cell.stem] = data
print(cell_data)


In [None]:

table={
        "TTT": "F", "TTC": "F", "TTA": "L", "TTG": "L",
        "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
        "TAT": "Y", "TAC": "Y",                           # noqa: E241
        "TGT": "C", "TGC": "C",             "TGG": "W",   # noqa: E241
        "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
        "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
        "CAT": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
        "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
        "ATT": "I", "ATC": "I", "ATA": "I", "ATG": "M",
        "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
        "AAT": "N", "AAC": "N", "AAA": "K", "AAG": "K",
        "AGT": "S", "AGC": "S", "AGA": "R", "AGG": "R",
        "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
        "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A",
        "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E",
        "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
        "TAA": "stop", "TAG": "stop", "TGA": "stop"
    }


codon_dict = {
    'Ala': ['GCT', 'GCC', 'GCA', 'GCG'],
    'Arg': ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
    'Asn': ['AAT', 'AAC'],
    'Asp': ['GAT', 'GAC'],
    'Cys': ['TGT', 'TGC'],
    'Gln': ['CAA', 'CAG'],
    'Glu': ['GAA', 'GAG'],
    'Gly': ['GGT', 'GGC', 'GGA', 'GGG'],
    'His': ['CAT', 'CAC'],
    'Ile': ['ATT', 'ATC', 'ATA'],
    'Leu': ['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'],
    'Lys': ['AAA', 'AAG'],
    'Met': ['ATG'],
    'Phe': ['TTT', 'TTC'],
    'Pro': ['CCT', 'CCC', 'CCA', 'CCG'],
    'Ser': ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
    'Thr': ['ACT', 'ACC', 'ACA', 'ACG'],
    'Trp': ['TGG'],
    'Tyr': ['TAT', 'TAC'],
    'Val': ['GTT', 'GTC', 'GTA', 'GTG'],
    'stop': ['TAA', 'TAG', 'TGA']
}

In [77]:
result = {}
for codon in table:
    result[codon] = []
    for cell_type in cell_data:
        result[codon].append(cell_data[cell_type][codon])
    result[codon].append(table[codon])
# print(result)

#, 
df = pd.DataFrame(data=result, index=list(cell_data.keys())+["aminoacid"], columns=table.keys()).T
df = df.groupby("aminoacid").apply(lambda x: x/x.sum()*100)
df = df.astype(float)
aminoacid = [table[codon] for codon in df.index]
vmax = df.max(axis=1)
vmin = df.min(axis=1)
vstd = df.std(axis=1)
cmin = df.idxmin(axis=1)
cmax = df.idxmax(axis=1)



df.insert(0, "max freq cell type", cmax)
df.insert(0, "max freq", vmax)
df.insert(0, "min freq cell type", cmin)
df.insert(0, "min freq", vmin)
df.insert(0, "range_differnece", vmax-vmin)
df.insert(0, "std", vstd)
df = df.round(2)
df.insert(0, "aminoacid", aminoacid)
df.to_csv(data_pool.joinpath("codon_frequence_scRNA_Muris_10X_v2.csv"))
df


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df = df.groupby("aminoacid").apply(lambda x: x/x.sum()*100)


Unnamed: 0,aminoacid,std,range_differnece,min freq,min freq cell type,max freq,max freq cell type,classical_monocyte,lung_endothelial_cell,myeloid_cell,...,granulocyte,endocardial_cell,bladder_urothelial_cell,ciliated_columnar_cell_of_tracheobronchial_tree,dendritic_cell,leukocyte,fibroblast,blood_cell,erythroblast,skeletal_muscle_satellite_cell
TTT,F,1.59,8.08,39.62,Langerhans_cell,47.70,proerythroblast,43.20,43.07,41.57,...,40.68,45.13,44.01,44.58,43.50,42.18,43.68,42.48,46.53,43.67
TTC,F,1.59,8.08,52.30,proerythroblast,60.38,Langerhans_cell,56.80,56.93,58.43,...,59.32,54.87,55.99,55.42,56.50,57.82,56.32,57.52,53.47,56.33
TTA,L,0.64,3.16,4.64,Langerhans_cell,7.80,cardiac_muscle_cell,5.57,5.92,5.35,...,4.86,6.80,5.70,6.46,6.02,5.49,6.18,5.64,5.18,5.74
TTG,L,0.63,4.29,11.12,type_II_pneumocyte,15.41,granulocyte,12.77,13.12,13.15,...,15.41,13.61,13.24,13.05,13.22,13.30,13.05,13.26,13.27,13.04
TCT,S,0.68,3.07,19.17,type_II_pneumocyte,22.24,granulocytopoietic_cell,20.59,20.04,21.00,...,22.01,21.00,20.98,20.17,21.35,20.86,21.09,21.35,21.69,21.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GGA,G,1.05,5.12,22.26,Langerhans_cell,27.39,cardiac_muscle_cell,24.15,24.24,23.23,...,24.12,25.55,23.37,24.32,24.29,23.58,25.31,23.43,25.12,22.95
GGG,G,1.11,6.80,16.39,bladder_cell,23.18,granulocyte,21.51,22.38,21.53,...,23.18,21.58,20.65,22.44,21.86,21.48,19.76,21.11,20.74,20.21
TAA,stop,3.90,20.13,24.30,type_II_pneumocyte,44.42,T_cell,34.63,33.33,37.24,...,34.85,36.41,40.80,31.05,36.13,36.65,35.05,38.19,43.35,40.29
TAG,stop,4.49,23.92,17.36,Langerhans_cell,41.27,granulocyte,20.70,22.19,19.97,...,41.27,21.33,21.43,24.66,19.54,23.16,21.00,20.58,21.94,19.71


In [None]:
# Iterate over the subplots and plot something on each one
pics_dir = Path("/home/unix/wangyanz/codon_usage/scRNA/pics")
for aa in codon_dict:
    if len(codon_dict[aa]) < 2:
        continue
    # codon_freq = {}
    # for codon in codon_dict[aa]:
    #     codon_freq[codon] = 0
    idx = 0
    fig, axs = plt.subplots(11, 5, figsize=(12, 24))
    for cell in cell_data:
        codon_value = []
        
        for codon in codon_dict[aa]:
            codon_value.append(cell_data[cell][codon])
        codon_value = np.array(codon_value)
        codon_value = codon_value / codon_value.sum()
        j = idx%5
        i = idx//5
        ax = axs[i, j]
        # Plot something on the current subplot
        ax.set_title(f'{cell}')
        ax.pie(codon_value, labels=codon_dict[aa], autopct='%1.1f%%', shadow=True, startangle=90)
        ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
        idx += 1
    plt.tight_layout()
    plt.savefig(pics_dir.joinpath(f"{aa}.png"))
