# Import Packages and Construct Bitome

In [8]:
from pathlib import Path
import sys

import pandas as pd

sys.path.append('../../bitome2')

from bitome.core import Bitome
from bitome.util import create_motif

In [5]:
K12_DATA_PATH = Path('../data/bitome2/mg1655')

In [6]:
# from GenBank annotation
origin = (3925743, 3925975)
# from 1. Duggin, I. G. & Bell, S. D. J. Mol. Biol. (2009). with the following terA/terC sequences:
# ter_a = 'AATTAGTATGTTGTAACTAAAGT'
# ter_c = 'ATATAGGATGTTGTAACTAATAT'
terminus = (1341745, 1609180)
cid_boundaries = [50000, 142779, 223485, 446815, 927474, 985894, 1080000, 1195008, 1301085, 1577657,
                 1799393, 1852621, 2102875, 2259719, 2380722, 2533498, 2726069, 2905711, 2996372, 3297098,
                 3423423, 3440040, 3652182, 3811250, 3941516, 4035239, 4166484, 4470986]

In [7]:
mg1655 = Bitome(
    Path(K12_DATA_PATH, 'NC_000913.3.gb'),
    name='MG1655',
    origin=origin, terminus=terminus,
    cid_boundaries=cid_boundaries,
    gene_table=Path(K12_DATA_PATH, 'gene_info_supp.csv'),
    tu_table=Path(K12_DATA_PATH, 'tu.csv'),
    operon_table=Path(K12_DATA_PATH, 'operon.csv'),
    tss_table=Path(K12_DATA_PATH, 'tss.csv'),
    tfbs_table=Path(K12_DATA_PATH, 'tfbs.csv'),
    terminator_table=Path(K12_DATA_PATH, 'terminator.csv'),
    attenuator_table=Path(K12_DATA_PATH, 'attenuator.csv'),
    rbs_table=Path(K12_DATA_PATH, 'rbs.csv'),
    riboswitch_table=Path(K12_DATA_PATH, 'riboswitch.csv')
)

# Load iModulon Genes for Florum

In [38]:
im_dfs = pd.read_excel('../data/imodulon_genes_for_florum.xlsx', sheet_name=None, index_col=0)

# Add TSS Information and Re-Save

In [40]:
with pd.ExcelWriter('../data/imodulon_genes_for_florum.xlsx') as writer:

    for im, im_df in im_dfs.items():
        tsses = []
        for b_num in im_df['b-number']:
            tus = mg1655._gene_to_tus[b_num]
            tsses.append(';'.join([str(int(l)) for l in list(set(mg1655.tu_table.loc[tus, 'tss']))
                                  if not pd.isna(l)]))
        im_df['TSSes'] = tsses
        im_df.to_excel(writer, sheet_name=im, index=False)