In [None]:
import pandas as pd
import plotly.express as px
import numpy as np
from pathlib import Path

In [None]:
rb_dir = Path("/nfs/nas22/fs2202/biol_micro_bioinf_nccr/hardt/nguyenb/tnseq/scratch/final_counts/analysis")
df_list = []
for f in rb_dir.rglob("*rra_results.csv"):
    df_list.append(pd.read_csv(f).assign(library=f.stem))
d1_res = pd.concat(df_list)
d1_res = d1_res[d1_res.contrast == 'd1']
d1_res = d1_res[d1_res.library == 'library_12_2_rra_results']

In [None]:
d1_res[d1_res.locus_tag.str.contains('gln')]

In [None]:
lps_res = pd.read_csv("/nfs/shared/_shared/lilith/07_23_Nguyen_Skroon/analysis_feces_inoculum/skroon_analysis_07_2023_feces_inoculum_rra_results.csv")

In [None]:
lps_res.head()

In [None]:
d1_res['Hit'] = (abs(d1_res.LFC)>1) & ((d1_res.neg_selection_fdr < 0.05)|(d1_res.pos_selection_fdr < 0.05))

In [None]:
d1_res.sample(5)

In [None]:
d1_med = d1_res.groupby(['contrast', 'locus_tag']).agg({"LFC":["median"], "Hit":['sum']}).reset_index()
d1_med.columns = ['contrast', 'locus_tag', 'LFC', 'Hit']
d1_med['Hit'] = d1_med['Hit'].apply(lambda x: min(x, 1))
d1_med

In [None]:
res = d1_med.merge(lps_res, left_on='locus_tag', right_on='Name', suffixes=['_d1', '_lps'])

In [None]:
res['Hit_lps'] = ((abs(res.LFC_lps)>0.6) & ((res.neg_selection_fdr < 0.05)|(res.pos_selection_fdr < 0.05))).astype(int)

In [None]:
res['Hit_diff'] = 2*res.Hit + res.Hit_lps

2 -> was a hit, not anymore
1 -> was not a hit, now is
3 -> always a hit

In [None]:
res[res.locus_tag =='ilvC']

In [None]:
res['Hit_diff'] = res.Hit_diff.replace({0:'NH', 1:'LPS specific hit', 2:'Not a hit with LPS', 3:'Always a hit'})

In [None]:
from Bio.KEGG import REST
import io

In [None]:
gene_info = pd.read_table(io.StringIO(REST.kegg_list("sey").read()), header=None)
gene_info.columns = ['locus_tag', 'feature', 'DN', 'Description']

In [None]:
gene_info[gene_info['locus_tag'] == 'sey:SL1344_3149']

In [None]:
result = REST.kegg_get("sey:SL1344_1548").read()

In [None]:
res[(res.neg_selection_fdr > 0.05)&(res.LFC_lps > -0.5) & (res.LFC_d1 < -1)]

In [None]:
for i in res[(res.neg_selection_fdr > 0.05)& (res.LFC_d1 < -1)].Name.values:
    print(i)

In [None]:
res

In [None]:
of_interest = res.query("LFC_d1< -2 & LFC_lps > -0.5").copy()
of_interest['Name'] = of_interest['Name'].str.replace('SL1344_', '')

In [None]:
of_interest

In [None]:
def improve_text_position(x):
    """ it is more efficient if the x values are sorted """
    # fix indentation 
    positions = ['top right', 'top left', ]  # you can add more: left center ...
    return [positions[i % len(positions)] for i in range(len(x))]

In [None]:
clrs = px.colors.qualitative.G10

In [None]:
res[res.Name.str.contains('gln')]

In [None]:
clrs[9]

In [None]:
import plotly.graph_objects as go
fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=res.LFC_d1, 
                         y=res.LFC_lps, mode='markers',
                         marker=dict(color=clrs[9]),
                         text=res.Name,
                         #hovertemplate="{res.Name}",
                    ))
fig.add_trace(go.Scatter(x=of_interest.LFC_d1, 
                    y=of_interest.LFC_lps,
                    
                    text = of_interest["Name"],
                    
                    textposition="top center",
                    textfont_size=14,
                    
                    mode='markers+text',
                    marker=dict(
            color=clrs[2],
            size=12,
            line=dict(
                color='black',
                width=1))
                    ))

fig.update_layout(width=800, height=800, template='plotly_white', showlegend=False)
fig.update_xaxes(title='LFC on Day1', title_font=dict(size=24))
fig.update_yaxes(title='LFC after LPS treatment', title_font=dict(size=24))

In [None]:
res[res.locus_tag.str.contains('clp')]

In [None]:
gene_info[gene_info.Description.str.contains('catalase')]

In [None]:
gene_info[gene_info['locus_tag'] == 'sey:SL1344_3813'].Description.values

In [None]:
px.scatter(res, x='LFC_d1', y='LFC_lps', color='Hit_diff', range_x=[-10, 5], range_y=[-10, 5], 
           hover_data=['Name'], width=600, height=600)

In [None]:
ydgA
uhpC
STM1731
recF
trpD
dcuB
fixA
 