# Prepare minicoli removed P1K

In [9]:
from pathlib import Path
import sys
import os.path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import f_oneway, gaussian_kde, mannwhitneyu, pearsonr, spearmanr
import seaborn as sns
from tqdm.notebook import tqdm
from pymodulon.compare import compare_ica
from pymodulon.core import IcaData
from pymodulon.io import load_json_model, save_to_json
from pymodulon.plotting import *

sns.set_style('whitegrid')

## Data preperation

### Load data from Data_Basal_Expression folder

In [3]:
precise1k = load_json_model('p1k.json.gz')

In [4]:
PRECISE_1K_log_tpm = precise1k.log_tpm

In [5]:
PRECISE_1K_X = precise1k.X

In [6]:
PRECISE_1K_M = precise1k.M

In [7]:
PRECISE_1K_sample_table = precise1k.sample_table

In [10]:
PRECISE_1K_sample_table = PRECISE_1K_sample_table[PRECISE_1K_sample_table['study'] != 'Minicoli']

In [11]:
PRECISE_1K_sample_table.to_csv('./P1K_minicoli_removed/P1K_minicoli_removed_sample_table.csv')

In [14]:
PRECISE_1K_sample_table

Unnamed: 0,sample_id,study,project,condition,rep_id,Strain Description,Strain,Culture Type,Evolved Sample,Base Media,...,contact,creator,passed_fastqc,passed_pct_reads_mapped,passed_reads_mapped_to_CDS,passed_global_correlation,full_name,passed_similar_replicates,passed_number_replicates,run_date
p1k_00001,control__wt_glc__1,Control,control,wt_glc,1,Escherichia coli K-12 MG1655,MG1655,Batch,No,M9,...,Sangwoo Seo,Sangwoo Seo,True,True,True,True,control:wt_glc,True,True,4/17/13
p1k_00002,control__wt_glc__2,Control,control,wt_glc,2,Escherichia coli K-12 MG1655,MG1655,Batch,No,M9,...,Sangwoo Seo,Sangwoo Seo,True,True,True,True,control:wt_glc,True,True,4/17/13
p1k_00003,fur__wt_dpd__1,Fur,fur,wt_dpd,1,Escherichia coli K-12 MG1655,MG1655,Batch,No,M9,...,Sangwoo Seo,Sangwoo Seo,True,True,True,True,fur:wt_dpd,True,True,10/2/13
p1k_00004,fur__wt_dpd__2,Fur,fur,wt_dpd,2,Escherichia coli K-12 MG1655,MG1655,Batch,No,M9,...,Sangwoo Seo,Sangwoo Seo,True,True,True,True,fur:wt_dpd,True,True,10/2/13
p1k_00005,fur__wt_fe__1,Fur,fur,wt_fe,1,Escherichia coli K-12 MG1655,MG1655,Batch,No,M9,...,Sangwoo Seo,Sangwoo Seo,True,True,True,True,fur:wt_fe,True,True,9/5/13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
p1k_01051,DDB3__PD-006_del-rpoS_Fed-batch_15h__2,DDB3,DDB3,PD-006_del-rpoS_Fed-batch_15h,2,Escherichia coli BW25113 del_rpoS,BW25113,Fed-batch,No,Medium C,...,sursud@biosustain.dtu.dk,Suresh Sudarsan,True,True,True,True,DDB3:PD-006_del-rpoS_Fed-batch_15h,True,True,10/1/20
p1k_01052,DDB3__PD-009_del-tnaA_Batch_6h__1,DDB3,DDB3,PD-009_del-tnaA_Batch_6h,1,Escherichia coli BW25113 del_tnaA,BW25113,Batch,No,Medium C,...,sursud@biosustain.dtu.dk,Suresh Sudarsan,True,True,True,True,DDB3:PD-009_del-tnaA_Batch_6h,True,False,10/1/20
p1k_01053,DDB3__PD-009_del-tnaA_Fed-batch_6h__1,DDB3,DDB3,PD-009_del-tnaA_Fed-batch_6h,1,Escherichia coli BW25113 del_tnaA,BW25113,Fed-batch,No,Medium C,...,sursud@biosustain.dtu.dk,Suresh Sudarsan,True,True,True,True,DDB3:PD-009_del-tnaA_Fed-batch_6h,True,False,10/1/20
p1k_01054,DDB3__PD-009_del-tnaA_Fed-batch_15h__1,DDB3,DDB3,PD-009_del-tnaA_Fed-batch_15h,1,Escherichia coli BW25113 del_tnaA,BW25113,Fed-batch,No,Medium C,...,sursud@biosustain.dtu.dk,Suresh Sudarsan,True,True,True,True,DDB3:PD-009_del-tnaA_Fed-batch_15h,True,True,10/1/20


### Get minicoli removed X

In [12]:
matching_columns = [col for col in PRECISE_1K_X.columns if col in PRECISE_1K_sample_table.index]
PRECISE_1K_X_minicoli_removed = PRECISE_1K_X[matching_columns]

In [13]:
PRECISE_1K_X_minicoli_removed

Unnamed: 0,p1k_00001,p1k_00002,p1k_00003,p1k_00004,p1k_00005,p1k_00006,p1k_00007,p1k_00008,p1k_00009,p1k_00010,...,p1k_01046,p1k_01047,p1k_01048,p1k_01049,p1k_01050,p1k_01051,p1k_01052,p1k_01053,p1k_01054,p1k_01055
b0002,-0.053993,0.053993,0.879043,1.089600,0.182453,-0.078873,-0.837092,-0.502703,0.421312,0.412354,...,-0.868029,-2.012060,-1.862453,-1.950586,-1.642161,-1.666869,-0.639387,-1.552748,-1.892598,-1.831359
b0003,-0.061973,0.061973,1.063330,1.477889,0.392573,0.234671,-0.763015,-0.120989,0.546181,0.520841,...,-1.501588,-2.532496,-2.515189,-2.443481,-2.258418,-2.312108,-1.124294,-2.060022,-2.348524,-2.122772
b0004,-0.036972,0.036972,0.003132,0.356702,-0.096756,-0.334913,-1.714913,-1.322707,-0.234147,-0.250669,...,-0.691635,-1.692102,-1.608955,-1.723764,-1.376420,-1.251488,-0.529185,-1.577833,-1.383521,-1.527079
b0005,-0.104967,0.104967,-1.118145,-1.277963,-0.181546,-0.364057,-2.121475,-1.994770,-0.386684,-0.634659,...,-2.925632,-4.626965,-4.067417,-2.964371,-3.075562,-3.101967,-2.210546,-3.821019,-4.563928,-2.946659
b0006,0.042242,-0.042242,-0.123592,-0.107778,-0.173177,-0.089888,-0.208133,-0.003044,-0.034707,-0.027806,...,0.190536,-0.618740,-0.175381,-0.317349,-0.466331,-0.169979,-0.402588,-0.915991,-0.804728,-0.858978
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
b4747,0.619141,-0.619141,-0.619141,-0.619141,0.202715,-0.619141,0.734510,-0.619141,0.790289,0.284033,...,-0.619141,0.766947,-0.619141,-0.619141,-0.619141,-0.619141,-0.619141,-0.619141,-0.619141,1.164489
b4748,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.421363,0.000000,0.000000,0.000000,...,5.441998,5.734377,5.614587,5.648036,5.957326,6.657140,3.672372,4.138619,5.809142,4.465101
b4751,-0.039552,0.039552,-1.290021,-1.298327,-0.481621,-0.609865,-1.680827,-1.239934,-0.134430,-0.363082,...,0.274369,-0.356653,-0.371515,-0.064824,-0.504051,0.323457,-1.267812,-0.534742,0.039449,-0.051373
b4755,-0.235166,0.235166,2.621649,3.256277,-0.302443,-0.096198,2.518406,3.268205,1.379971,0.910647,...,-0.633352,-0.039091,-0.136657,0.734843,1.699093,-2.399592,-2.399592,3.413940,1.914515,1.564518


In [15]:
PRECISE_1K_X_minicoli_removed.to_csv('./P1K_minicoli_removed/P1K_minicoli_removed_X.csv')