In [1]:
import pandas as pd
import pickle
from flask import Flask
from app import *

# Datasets Round 1

In [2]:
# [1] Add dataset Wolfram-Schauerte_2022

description = 'Exponentially growing E. coli infected with T4 phage (MOI = 3.1) in biological triplicates. Samples taken before (0) and 1, 4, 7, 20 min post-infection and analysed by stranded RNA-seq (n=3). [GSE211026]'
doi = 'https://doi.org/10.3390/v14112502'

with app.app_context():
    addFullDataset(phageName='T4 phage', phageDes = 'Bacteriophage T4', phageID = 'NC_000866.4', phageType = 'virulent',
               hostName = 'E. coli B strain', hostDes = 'E. coli K12 MG1655 reference', hostID = 'U00096.3', hostGroup = 'Escherichia coli',
               rnaSeqPath = 'Datasets/Wolfram_Schauerte_2022/Notebook/Wolfram-Schauerte_fractional_expression.tsv', norm = 'fractional',
               name = 'Wolfram-Schauerte_2022', year = 2022, journal = 'Viruses', firstauthor = 'Wolfram-Schauerte', pubmedID = 36423111, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Wolfram_Schauerte_2022/Notebook/Wolfram-Schauerte_phage_gff3.tsv', phageGenomeName = 'T4 phage genome',
               hostGenomePath = 'Datasets/Wolfram_Schauerte_2022/Notebook/Wolfram-Schauerte_host_gff3.tsv', hostGenomeName = 'E. coli genome')


# Add more datasets with different normalizations of Wolfram-Schauerte_2022
with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Wolfram_Schauerte_2022/Notebook/Wolfram-Schauerte_full_TPM.tsv',
                     norm = 'TPM', name = 'Wolfram-Schauerte_2022', year = 2022, journal = 'Viruses', description = description,
                     firstauthor = 'Wolfram-Schauerte', pubmedID = 36423111, host_id=1, phage_id=1, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Wolfram_Schauerte_2022/Notebook/Wolfram-Schauerte_TPM_means.tsv',
                     norm = 'TPM_means', name = 'Wolfram-Schauerte_2022', year = 2022, journal = 'Viruses', description = description,
                     firstauthor = 'Wolfram-Schauerte', pubmedID = 36423111, host_id=1, phage_id=1, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Wolfram_Schauerte_2022/Notebook/Wolfram-Schauerte_TPM_std.tsv',
                     norm = 'TPM_std', name = 'Wolfram-Schauerte_2022', year = 2022, journal = 'Viruses', description = description,
                     firstauthor = 'Wolfram-Schauerte', pubmedID = 36423111, host_id=1, phage_id=1, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Wolfram_Schauerte_2022/Notebook/Wolfram-Schauerte_full_raw_counts.tsv',
                     norm = 'raw_counts', name = 'Wolfram-Schauerte_2022', year = 2022, journal = 'Viruses', description = description,
                     firstauthor = 'Wolfram-Schauerte', pubmedID = 36423111, host_id=1, phage_id=1, doi = doi, upload_date = '25-08-12')

In [3]:
# [2] Add T4 minus toxIN datasets from Guegler 2021 referring to same phage and host as Wolfram-Schauerte 2022

description = 'E. coli not expressing toxIN (pBR322 empty vector; -toxIN) infected with T4 phage (MOI = 5) in biological duplicates. Samples taken before (0) and 2.5, 5, 10, 20, 30 min post-infection (n=2). [GSE161794]'
doi = 'https://doi.org/10.1016/j.molcel.2021.03.027'

with app.app_context():
    hostAdd = Host(name = 'E. coli K 12 pBR322 empty', group = 'Escherichia coli', description = 'E. coli K12 MG1655 without toxIN expression', ncbi_id = 'U00096.3')
    db.session.add(hostAdd)
    db.session.commit()

with app.app_context():
    hostgff = pd.read_csv('Datasets/Guegler_2021/Notebook_T4/Guegler_T4_minusToxIN_host_gff3.tsv', sep='\t', comment = '#')
    hostgenome = pickle.dumps(hostgff)
    hostGFF = HostGenome(name = 'E. coli K12 MG1655 genome', host_id = 2, gff_data = hostgenome)
    db.session.add(hostGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T4/Guegler_T4_minusToxIN_fractional_expression.tsv',
                     norm = 'fractional', name = 'Guegler_2021_T4_minus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=2, phage_id=1, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T4/Guegler_T4_minusToxIN_full_TPM.tsv',
                     norm = 'TPM', name = 'Guegler_2021_T4_minus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=2, phage_id=1, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T4/Guegler_T4_minusToxIN_TPM_means.tsv',
                     norm = 'TPM_means', name = 'Guegler_2021_T4_minus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=2, phage_id=1, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T4/Guegler_T4_minusToxIN_TPM_std.tsv',
                     norm = 'TPM_std', name = 'Guegler_2021_T4_minus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=2, phage_id=1, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T4/Guegler_T4_minusToxIN_full_raw_counts.tsv',
                     norm = 'raw_counts', name = 'Guegler_2021_T4_minus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=2, phage_id=1, doi = doi, upload_date = '25-08-12')

In [4]:
# [3] Add T4 plustoxIN datasets from Guegler 2021 referring to same phage and host as Wolfram-Schauerte 2022

description = 'E. coli induced to express toxIN (pBR322-toxIN) infected with T4 phage (MOI = 5) in biological duplicates. Samples taken before (0) and 2.5, 5, 10, 20, 30 min post-infection (n=2). [GSE161794]'
doi = 'https://doi.org/10.1016/j.molcel.2021.03.027'

with app.app_context():
    hostAdd = Host(name = 'E. coli K 12 pBR322-toxIN', group = 'Escherichia coli', description = 'E. coli K12 MG1655 expressing toxIN', ncbi_id = 'U00096.3')
    db.session.add(hostAdd)
    db.session.commit()

with app.app_context():
    hostgff = pd.read_csv('Datasets/Guegler_2021/Notebook_T4/Guegler_T4_plusToxIN_host_gff3.tsv', sep='\t', comment = '#')
    hostgenome = pickle.dumps(hostgff)
    hostGFF = HostGenome(name = 'E. coli K12 MG1655 genome', host_id = 3, gff_data = hostgenome)
    db.session.add(hostGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T4/Guegler_T4_plusToxIN_fractional_expression.tsv',
                     norm = 'fractional', name = 'Guegler_2021_T4_plus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=3, phage_id=1, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T4/Guegler_T4_plusToxIN_full_TPM.tsv',
                     norm = 'TPM', name = 'Guegler_2021_T4_plus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=3, phage_id=1, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T4/Guegler_T4_plusToxIN_TPM_means.tsv',
                     norm = 'TPM_means', name = 'Guegler_2021_T4_plus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=3, phage_id=1, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T4/Guegler_T4_plusToxIN_TPM_std.tsv',
                     norm = 'TPM_std', name = 'Guegler_2021_T4_plus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=3, phage_id=1, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T4/Guegler_T4_plusToxIN_full_raw_counts.tsv',
                     norm = 'raw_counts', name = 'Guegler_2021_T4_plus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=3, phage_id=1, doi = doi, upload_date = '25-08-12')

In [5]:
# [4] Add T7 minus toxIN datasets from Guegler 2021 referring to same host as Guegler_T4_minusToxIN

description = 'E. coli not expressing toxIN (pBR322 empty vector; -toxIN) infected with T7 phage (MOI = 5) in biological duplicates. Samples taken before (0) and 2.5, 5, 10, 20, 30 min post-infection (n=2). [GSE161795]'
doi = 'https://doi.org/10.1016/j.molcel.2021.03.027'

with app.app_context():
    phageAdd = Phage(name = 'T7 phage', description = 'Bacteriophage T7', ncbi_id = 'NC_001604.1', phage_type = 'virulent')
    db.session.add(phageAdd)
    db.session.commit()

with app.app_context():
    phagegff = pd.read_csv('Datasets/Guegler_2021/Notebook_T7/Guegler_T7_minusToxIN_phage_gff3.tsv', sep='\t', comment = '#')
    phagegenome = pickle.dumps(phagegff)
    phageGFF = PhageGenome(name = 'T7 phage genome', phage_id = 2, gff_data = phagegenome)
    db.session.add(phageGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T7/Guegler_T7_minusToxIN_fractional_expression.tsv',
                     norm = 'fractional', name = 'Guegler_2021_T7_minus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=2, phage_id=2, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T7/Guegler_T7_plusToxIN_full_TPM.tsv',
                     norm = 'TPM', name = 'Guegler_2021_T7_minus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=2, phage_id=2, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T7/Guegler_T7_plusToxIN_TPM_means.tsv',
                     norm = 'TPM_means', name = 'Guegler_2021_T7_minus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=2, phage_id=2, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T7/Guegler_T7_plusToxIN_TPM_std.tsv',
                     norm = 'TPM_std', name = 'Guegler_2021_T7_minus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=2, phage_id=2, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T7/Guegler_T7_plusToxIN_full_raw_counts.tsv',
                     norm = 'raw_counts', name = 'Guegler_2021_T7_minus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=2, phage_id=2, doi = doi, upload_date = '25-08-12')

In [6]:
# [5] Add T7 plus toxIN datasets from Guegler 2021 referring to same host as Guegler_T4_plusToxIN

description = 'E. coli induced to express toxIN (pBR322-toxIN) infected with T7 phage (MOI = 5) in biological duplicates. Samples taken before (0) and 2.5, 5, 10, 20, 30 min post-infection (n=2). [GSE161795]'
doi = 'https://doi.org/10.1016/j.molcel.2021.03.027'

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T7/Guegler_T7_plusToxIN_fractional_expression.tsv',
                     norm = 'fractional', name = 'Guegler_2021_T7_plus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=3, phage_id=2, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T7/Guegler_T7_plusToxIN_full_TPM.tsv',
                     norm = 'TPM', name = 'Guegler_2021_T7_plus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=3, phage_id=2, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T7/Guegler_T7_plusToxIN_TPM_means.tsv',
                     norm = 'TPM_means', name = 'Guegler_2021_T7_plus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=3, phage_id=2, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T7/Guegler_T7_plusToxIN_TPM_std.tsv',
                     norm = 'TPM_std', name = 'Guegler_2021_T7_plus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=3, phage_id=2, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Guegler_2021/Notebook_T7/Guegler_T7_plusToxIN_full_raw_counts.tsv',
                     norm = 'raw_counts', name = 'Guegler_2021_T7_plus_toxIN', year = 2021, journal = 'Molecular Cell', description = description,
                     firstauthor = 'Guegler', pubmedID = 33838104, host_id=3, phage_id=2, doi = doi, upload_date = '25-08-12')

In [7]:
# [6] Add datasets Finstrlova_2022 Newman

description = 'Infection of S. aureus strain Newman with Staphylococcus virus K (MOI = 7) sampled in biological triplicates before (0) and 2, 5, 10, 20, 30 minutes post-infection (n=3). [GSE190637]'
doi = 'https://doi.org/10.1128/spectrum.00123-22'

with app.app_context():
    addFullDataset(phageName='Kayvirus phage K', phageDes = 'Staphylococcus virus K', phageID = 'NC_005880.2', phageType = 'virulent',
               hostName = 'S. aureus strain Newman', hostDes = 'Staphylococcus aureus strain Newman', hostID = 'NC_009641.1', hostGroup = 'Staphylococcus aureus',
               rnaSeqPath = 'Datasets/Finstrlova_2022/Notebook_Newman/Finstrlova_Newman_fractional_expression.tsv', norm = 'fractional',
               name = 'Finstrlova_2022_Newman', year = 2022, journal = 'Microbiology Spectrum', firstauthor = 'Finstrlova', pubmedID = 3545752, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Finstrlova_2022/Notebook_Newman/Finstrlova_Newman_phage_gff3.tsv', phageGenomeName = 'K virus genome',
               hostGenomePath = 'Datasets/Finstrlova_2022/Notebook_Newman/Finstrlova_Newman_host_gff3.tsv', hostGenomeName = 'S. aureus Newman genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Finstrlova_2022/Notebook_Newman/Finstrlova_Newman_full_TPM.tsv', norm = 'TPM',
               name = 'Finstrlova_2022_Newman', year = 2022, journal = 'Microbiology Spectrum', firstauthor = 'Finstrlova', pubmedID = 3545752, description = description, host_id=4, phage_id=3, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Finstrlova_2022/Notebook_Newman/Finstrlova_Newman_TPM_means.tsv', norm = 'TPM_means',
               name = 'Finstrlova_2022_Newman', year = 2022, journal = 'Microbiology Spectrum', firstauthor = 'Finstrlova', pubmedID = 3545752, description = description, host_id=4, phage_id=3, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Finstrlova_2022/Notebook_Newman/Finstrlova_Newman_TPM_std.tsv', norm = 'TPM_std',
               name = 'Finstrlova_2022_Newman', year = 2022, journal = 'Microbiology Spectrum', firstauthor = 'Finstrlova', pubmedID = 3545752, description = description, host_id=4, phage_id=3, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Finstrlova_2022/Notebook_Newman/Finstrlova_Newman_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Finstrlova_2022_Newman', year = 2022, journal = 'Microbiology Spectrum', firstauthor = 'Finstrlova', pubmedID = 3545752, description = description, host_id=4, phage_id=3, doi = doi, upload_date = '25-08-12')

In [8]:
# [7] Add datasets Finstrlova_2022 SH1000 

description = 'Infection of S. aureus strain SH1000 with Staphylococcus virus K (MOI = 7) sampled in biological triplicates before (0) and 2, 5, 10, 20, 30 minutes post-infection (n=3). [GSE190637]'
doi = 'https://doi.org/10.1128/spectrum.00123-22'

with app.app_context():
    hostAdd = Host(name = 'S. aureus SH1000', description = 'Staphylococcus aureus strain SH1000', ncbi_id = 'NZ_CP059180.1', group = 'Staphylococcus aureus')
    db.session.add(hostAdd)
    db.session.commit()

with app.app_context():
    hostgff = pd.read_csv('Datasets/Finstrlova_2022/Notebook_SH1000/Finstrlova_SH1000_host_gff3.tsv', sep='\t', comment = '#')
    hostgenome = pickle.dumps(hostgff)
    hostGFF = HostGenome(name = 'S. aureus SH1000 genome', host_id = 5, gff_data = hostgenome)
    db.session.add(hostGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Finstrlova_2022/Notebook_SH1000/Finstrlova_SH1000_fractional_expression.tsv', norm = 'fractional',
               name = 'Finstrlova_2022_SH1000', year = 2022, journal = 'Microbiology Spectrum', firstauthor = 'Finstrlova', pubmedID = 3545752, description = description, host_id=5, phage_id=3, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Finstrlova_2022/Notebook_SH1000/Finstrlova_SH1000_full_TPM.tsv', norm = 'TPM',
               name = 'Finstrlova_2022_SH1000', year = 2022, journal = 'Microbiology Spectrum', firstauthor = 'Finstrlova', pubmedID = 3545752, description = description, host_id=5, phage_id=3, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Finstrlova_2022/Notebook_SH1000/Finstrlova_SH1000_TPM_means.tsv', norm = 'TPM_means',
               name = 'Finstrlova_2022_SH1000', year = 2022, journal = 'Microbiology Spectrum', firstauthor = 'Finstrlova', pubmedID = 3545752, description = description, host_id=5, phage_id=3, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Finstrlova_2022/Notebook_SH1000/Finstrlova_SH1000_TPM_std.tsv', norm = 'TPM_std',
               name = 'Finstrlova_2022_SH1000', year = 2022, journal = 'Microbiology Spectrum', firstauthor = 'Finstrlova', pubmedID = 3545752, description = description, host_id=5, phage_id=3, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Finstrlova_2022/Notebook_SH1000/Finstrlova_SH1000_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Finstrlova_2022_SH1000', year = 2022, journal = 'Microbiology Spectrum', firstauthor = 'Finstrlova', pubmedID = 3545752, description = description, host_id=5, phage_id=3, doi = doi, upload_date = '25-08-12')

In [9]:
# [8] Add datasets Kuptsov_2022

description = 'Infection of S. aureus strain SA515 with Staphylococcus phage vB_SauM-515A1 (MOI = 10) sampled in biological triplicates at 5, 15, 30 minutes post-infection (n=3). [PRJNA659920]'
doi = 'https://doi.org/10.3390/v14030567'

with app.app_context():
    addFullDataset(phageName='vB_SauM-515A1 kayvirus', phageDes = 'Staphylococcus phage vB_SauM-515A1', phageID = 'MN047438.1', phageType = 'virulent',
               hostName = 'S. aureus strain SA515', hostDes = 'Staphylococcus aureus strain SA515', hostID = 'GCA_022352045.1', hostGroup = 'Staphylococcus aureus',
               rnaSeqPath = 'Datasets/Kuptsov_2022/Notebook/Kuptsov_fractional_expression.tsv', norm = 'fractional',
               name = 'Kuptsov_2022', year = 2022, journal = 'Viruses', firstauthor = 'Kuptsov', pubmedID = 35336974, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Kuptsov_2022/Notebook/Kuptsov_phage_gff3.tsv', phageGenomeName = 'vB_SauM-515A1 virus genome',
               hostGenomePath = 'Datasets/Kuptsov_2022/Notebook/Kuptsov_host_gff3.tsv', hostGenomeName = 'S. aureus SA515 genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Kuptsov_2022/Notebook/Kuptsov_full_TPM.tsv', norm = 'TPM',
               name = 'Kuptsov_2022', year = 2022, journal = 'Viruses', firstauthor = 'Kuptsov', pubmedID = 35336974, description = description, host_id=6, phage_id=4, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Kuptsov_2022/Notebook/Kuptsov_TPM_means.tsv', norm = 'TPM_means',
               name = 'Kuptsov_2022', year = 2022, journal = 'Viruses', firstauthor = 'Kuptsov', pubmedID = 35336974, description = description, host_id=6, phage_id=4, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Kuptsov_2022/Notebook/Kuptsov_TPM_std.tsv', norm = 'TPM_std',
               name = 'Kuptsov_2022', year = 2022, journal = 'Viruses', firstauthor = 'Kuptsov', pubmedID = 35336974, description = description, host_id=6, phage_id=4, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Kuptsov_2022/Notebook/Kuptsov_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Kuptsov_2022', year = 2022, journal = 'Viruses', firstauthor = 'Kuptsov', pubmedID = 35336974, description = description, host_id=6, phage_id=4, doi = doi, upload_date = '25-08-12')

In [10]:
# [9] Add datasets Leskinen_2016

description = 'Fresh cultures (logarithmic phase) of Yersinia enterocolitica YeO3-R1 were infected with bacteriophage R1-37 (MOI = 10). Samples were taken before (0) and 2, 5, 10, 15, 21, 28, 35, 42, 49 min post-infection (n=1). [GSE77068]'
doi = 'https://doi.org/https://doi.org/10.3390/v8040111'

with app.app_context():
    addFullDataset(phageName='PhiR1-37', phageDes = 'Yersinia phage phiR1-37', phageID = 'AJ972879.2', phageType = 'virulent',
               hostName = 'Y. enterocolitica O:3', hostDes = 'Yersinia enterocolitica O:3', hostID = 'NC_017564.1 + NC_017565.1', hostGroup = 'Yersinia enterolitica',
               rnaSeqPath = 'Datasets/Leskinen_2016/Notebook/Leskinen_fractional_expression.tsv', norm = 'fractional',
               name = 'Leskinen_2016', year = 2016, journal = 'Viruses', firstauthor = 'Leskinen', pubmedID = 27110815, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Leskinen_2016/Notebook/Leskinen_phage_gff3.tsv', phageGenomeName = 'PhiR1-37 virus genome',
               hostGenomePath = 'Datasets/Leskinen_2016/Notebook/Leskinen_host_gff3.tsv', hostGenomeName = 'Y. enterocolitica O:3 genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Leskinen_2016/Notebook/Leskinen_full_TPM.tsv', norm = 'TPM',
               name = 'Leskinen_2016', year = 2016, journal = 'Viruses', firstauthor = 'Leskinen', pubmedID = 27110815, description = description, host_id=7, phage_id=5, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Leskinen_2016/Notebook/Leskinen_TPM_means.tsv', norm = 'TPM_means',
               name = 'Leskinen_2016', year = 2016, journal = 'Viruses', firstauthor = 'Leskinen', pubmedID = 27110815, description = description, host_id=7, phage_id=5, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Leskinen_2016/Notebook/Leskinen_TPM_std.tsv', norm = 'TPM_std',
               name = 'Leskinen_2016', year = 2016, journal = 'Viruses', firstauthor = 'Leskinen', pubmedID = 27110815, description = description, host_id=7, phage_id=5, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Leskinen_2016/Notebook/Leskinen_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Leskinen_2016', year = 2016, journal = 'Viruses', firstauthor = 'Leskinen', pubmedID = 27110815, description = description, host_id=7, phage_id=5, doi = doi, upload_date = '25-08-12')

In [11]:
# [10] Add datasets brandao_2021 in LB medium

description = 'P. aeruginosa PAO1 infected with LUZ19 phage in LB medium (MOI = 75). Samples taken before (0) and 5, 10, 15 min post-infection in biological triplicates (n=3). [GSE162278]'
doi = 'https://doi.org/10.1080/15476286.2020.1870844'

with app.app_context():
    addFullDataset(phageName='LUZ19', phageDes = 'Pseudomonas phage LUZ19', phageID = 'NC_010326.1', phageType = 'virulent',
               hostName = 'P. aeruginosa PAO1', hostDes = 'Pseudomonas aeruginosa PAO1', hostID = 'NC_002516.2', hostGroup = 'Pseudomonas aeruginosa',
               rnaSeqPath = 'Datasets/Brandão_2021/Notebook/Brandao_LB_fractional_expression.tsv', norm = 'fractional',
               name = 'Brandão_2021_LB', year = 2021, journal = 'RNA Biology', firstauthor = 'Brandão', pubmedID = 33448239, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Brandão_2021/Notebook/Brandao_LB_phage_gff3.tsv', phageGenomeName = 'LUZ19 virus genome',
               hostGenomePath = 'Datasets/Brandão_2021/Notebook/Brandao_LB_host_gff3.tsv', hostGenomeName = 'P. aeruginosa PAO1 genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Brandão_2021/Notebook/Brandao_LB_full_TPM.tsv', norm = 'TPM',
               name = 'Brandão_2021_LB', year = 2021, journal = 'RNA Biology', firstauthor = 'Brandão', pubmedID = 33448239, description = description, host_id=8, phage_id=6, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Brandão_2021/Notebook/Brandao_LB_TPM_means.tsv', norm = 'TPM_means',
               name = 'Brandão_2021_LB', year = 2021, journal = 'RNA Biology', firstauthor = 'Brandão', pubmedID = 33448239, description = description, host_id=8, phage_id=6, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Brandão_2021/Notebook/Brandao_LB_TPM_std.tsv', norm = 'TPM_std',
               name = 'Brandão_2021_LB', year = 2021, journal = 'RNA Biology', firstauthor = 'Brandão', pubmedID = 33448239, description = description, host_id=8, phage_id=6, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Brandão_2021/Notebook/Brandao_LB_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Brandão_2021_LB', year = 2021, journal = 'RNA Biology', firstauthor = 'Brandão', pubmedID = 33448239, description = description, host_id=8, phage_id=6, doi = doi, upload_date = '25-08-12')

In [12]:
# [11] Add datasets brandao_2021 in MCCM medium

description = 'P. aeruginosa PAO1 infected with LUZ19 phage in mammalian cell culture medium (MCCM) (MOI = 75). Samples taken before (0) and 5, 10, 15 min post-infection in biological triplicates (n=3). [GSE162278]'
doi = 'https://doi.org/10.1080/15476286.2020.1870844'

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Brandão_2021/Notebook/Brandao_MCCM_fractional_expression.tsv', norm = 'fractional',
               name = 'Brandão_2021_MCCM', year = 2021, journal = 'RNA Biology', firstauthor = 'Brandão', pubmedID = 33448239, description = description, host_id=8, phage_id=6, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Brandão_2021/Notebook/Brandao_MCCM_full_TPM.tsv', norm = 'TPM',
               name = 'Brandão_2021_MCCM', year = 2021, journal = 'RNA Biology', firstauthor = 'Brandão', pubmedID = 33448239, description = description, host_id=8, phage_id=6, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Brandão_2021/Notebook/Brandao_MCCM_TPM_means.tsv', norm = 'TPM_means',
               name = 'Brandão_2021_MCCM', year = 2021, journal = 'RNA Biology', firstauthor = 'Brandão', pubmedID = 33448239, description = description, host_id=8, phage_id=6, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Brandão_2021/Notebook/Brandao_MCCM_TPM_std.tsv', norm = 'TPM_std',
               name = 'Brandão_2021_MCCM', year = 2021, journal = 'RNA Biology', firstauthor = 'Brandão', pubmedID = 33448239, description = description, host_id=8, phage_id=6, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Brandão_2021/Notebook/Brandao_MCCM_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Brandão_2021_MCCM', year = 2021, journal = 'RNA Biology', firstauthor = 'Brandão', pubmedID = 33448239, description = description, host_id=8, phage_id=6, doi = doi, upload_date = '25-08-12')

In [13]:
# [12] Add datasets sprenger_2024 Vibrio Cholerae WT and VP882 WT

description = 'Lytic activation of Vibrio Phage VP882 WT in Vibrio Cholerae O1 el tor WT. Samples taken before (0) and 30, 60 min post-infection in biological triplicates (n=3). [GSE247769]'
doi = 'https://doi.org/10.1016/j.chom.2024.03.010'

with app.app_context():
    addFullDataset(phageName='VP882 WT', phageDes = 'Vibrio Phage VP882 WT', phageID = 'NC_009016.1', phageType = 'temperate',
               hostName = 'V. Cholerae O1 WT', hostDes = 'Vibrio Cholerae O1 El Tor strain C6706 WT', hostID = 'NC_002505.1 + NC_002506.1', hostGroup = 'Vibrio Cholerae',
               rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_1/Sprenger_VP882_WT_fractional_expression.tsv', norm = 'fractional',
               name = 'Sprenger_2024_VC_WT_VP882_WT', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Sprenger_2024/Notebook_1/Sprenger_VP882_WT_phage_gff3.tsv', phageGenomeName = 'VP882 genome',
               hostGenomePath = 'Datasets/Sprenger_2024/Notebook_1/Sprenger_VP882_WT_host_gff3.tsv', hostGenomeName = 'V. cholerae O1 WT genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_1/Sprenger_VP882_WT_full_TPM.tsv', norm = 'TPM',
               name = 'Sprenger_2024_VC_WT_VP882_WT', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, host_id=9, phage_id=7, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_1/Sprenger_VP882_WT_TPM_means.tsv', norm = 'TPM_means',
               name = 'Sprenger_2024_VC_WT_VP882_WT', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, host_id=9, phage_id=7, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_1/Sprenger_VP882_WT_TPM_std.tsv', norm = 'TPM_std',
               name = 'Sprenger_2024_VC_WT_VP882_WT', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, host_id=9, phage_id=7, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_1/Sprenger_VP882_WT_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Sprenger_2024_VC_WT_VP882_WT', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, host_id=9, phage_id=7, doi = doi, upload_date = '25-08-12')

In [14]:
# [13] Add datasets sprenger_2024 Vibrio Cholerae WT and VP882 delta vpdS

description = 'Lytic activation of Vibrio Phage VP882 delta vpdS in Vibrio Cholerae O1 el tor WT. Samples were taken before (0) and 30, 60 min post-infection in biological triplicates. [GSE247769]'
doi = 'https://doi.org/10.1016/j.chom.2024.03.010'

with app.app_context():
    phageAdd = Phage(name = 'VP882 delta vpdS', description = 'Vibrio Phage VP882 delta vpdS', ncbi_id = 'NC_009016.1', phage_type = 'temperate')
    db.session.add(phageAdd)
    db.session.commit()

with app.app_context():
    phagegff = pd.read_csv('Datasets/Sprenger_2024/Notebook_1/Sprenger_VP882_delta_vpdS_phage_gff3.tsv', sep='\t', comment = '#')
    phagegenome = pickle.dumps(phagegff)
    phageGFF = PhageGenome(name = 'VP882 delta vpdS genome', phage_id = 8, gff_data = phagegenome)
    db.session.add(phageGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_1/Sprenger_VP882_delta_vpdS_fractional_expression.tsv', norm = 'fractional',
               name = 'Sprenger_2024_VC_WT_VP882_delta_vpdS', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, host_id=9, phage_id=8, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_1/Sprenger_VP882_delta_vpdS_full_TPM.tsv', norm = 'TPM',
               name = 'Sprenger_2024_VC_WT_VP882_delta_vpdS', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, host_id=9, phage_id=8, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_1/Sprenger_VP882_delta_vpdS_TPM_means.tsv', norm = 'TPM_means',
               name = 'Sprenger_2024_VC_WT_VP882_delta_vpdS', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, host_id=9, phage_id=8, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_1/Sprenger_VP882_delta_vpdS_TPM_std.tsv', norm = 'TPM_std',
               name = 'Sprenger_2024_VC_WT_VP882_delta_vpdS', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, host_id=9, phage_id=8, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_1/Sprenger_VP882_delta_vpdS_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Sprenger_2024_VC_WT_VP882_delta_vpdS', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, host_id=9, phage_id=8, doi = doi, upload_date = '25-08-12')

In [15]:
# [14] Add datasets sprenger_2024 Vibrio Cholerae delta tdh and VP882 WT

description = 'Lytic activation of Vibrio Phage VP882 WT in Vibrio Cholerae O1 el tor delta tdh. Samples were taken before (0) and 15, 60, 120 min post-infection in biological triplicates. [GSE247770]'
doi = 'https://doi.org/10.1016/j.chom.2024.03.010'

with app.app_context():
    hostAdd = Host(name = 'V. Cholerae O1 delta tdh', description = 'Vibrio Cholerae O1 El Tor strain C6706 delta tdh', ncbi_id = 'NC_009016.1', group = 'Vibrio Cholerae')
    db.session.add(hostAdd)
    db.session.commit()

with app.app_context():
    hostgff = pd.read_csv('Datasets/Sprenger_2024/Notebook_2/Sprenger_VC_delta_tdh_host_gff3.tsv', sep='\t', comment = '#')
    hostgenome = pickle.dumps(hostgff)
    hostGFF = HostGenome(name = 'V. cholerae O1 delta tdh genome', host_id = 10, gff_data = hostgenome)
    db.session.add(hostGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_2/Sprenger_VC_delta_tdh_fractional_expression.tsv', norm = 'fractional',
               name = 'Sprenger_2024_VC_delta_tdh_VP882_WT', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, host_id=10, phage_id=7, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_2/Sprenger_VC_delta_tdh_full_TPM.tsv', norm = 'TPM',
               name = 'Sprenger_2024_VC_delta_tdh_VP882_WT', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, host_id=10, phage_id=7, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_2/Sprenger_VC_delta_tdh_TPM_means.tsv', norm = 'TPM_means',
               name = 'Sprenger_2024_VC_delta_tdh_VP882_WT', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, host_id=10, phage_id=7, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_2/Sprenger_VC_delta_tdh_TPM_std.tsv', norm = 'TPM_std',
               name = 'Sprenger_2024_VC_delta_tdh_VP882_WT', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, host_id=10, phage_id=7, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Sprenger_2024/Notebook_2/Sprenger_VC_delta_tdh_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Sprenger_2024_VC_delta_tdh_VP882_WT', year = 2024, journal = 'CellPress', firstauthor = 'Sprenger', pubmedID = 38579715, description = description, host_id=10, phage_id=7, doi = doi, upload_date = '25-08-12')

In [16]:
# [15] Add datasets meaden_2021 P. aeruginosa PA14 WT infection

description = 'Infection of P. aeruginosa PA14 WT with Pseudomonas phage DMS3 (MOI = 0.5). Samples were taken 35, 60 and 120 minutes post-infection (n=5). [PRJEB31514]'
doi = 'https://doi.org/10.1038/s41396-020-00794-w'

with app.app_context():
    addFullDataset(phageName='DMS3', phageDes = 'Pseudomonas phage DMS3', phageID = 'NC_008717.1', phageType = 'temperate',
               hostName = 'P. aeruginosa PA14 WT', hostDes = 'Pseudomonas aeruginosa PA14 WT', hostID = 'NZ_CP136842.1', hostGroup = 'Pseudomonas aeruginosa',
               rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_WT_fractional_expression.tsv', norm = 'fractional',
               name = 'Meaden_2021_WT_infection', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Meaden_2021/Notebook/Meaden_WT_phage_gff3.tsv', phageGenomeName = 'Pseudomonas phage DMS3 genome',
               hostGenomePath = 'Datasets/Meaden_2021/Notebook/Meaden_WT_host_gff3.tsv', hostGenomeName = 'P. aeruginosa PA14 WT genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_WT_full_TPM.tsv', norm = 'TPM',
               name = 'Meaden_2021_WT_infection', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, host_id=11, phage_id=9, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_WT_TPM_means.tsv', norm = 'TPM_means',
               name = 'Meaden_2021_WT_infection', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, host_id=11, phage_id=9, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_WT_TPM_std.tsv', norm = 'TPM_std',
               name = 'Meaden_2021_WT_infection', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, host_id=11, phage_id=9, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_WT_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Meaden_2021_WT_infection', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, host_id=11, phage_id=9, doi = doi, upload_date = '25-08-12')

In [17]:
# [16] Add datasets meaden_2021 P. aeruginosa PA14 BIM infection

description = 'Infection of P. aeruginosa PA14 BIM2 (carrying two crispr spacers targeting DMS3vir) with Pseudomonas phage DMS3 (MOI = 0.5). Samples were taken 35, 60 and 120 minutes post-infection (n=5). [PRJEB31514]'
doi = 'https://doi.org/10.1038/s41396-020-00794-w'

with app.app_context():
    hostAdd = Host(name = 'P. aeruginosa PA14 BIM2', description = 'Pseudomonas aeruginosa PA14 BIM2', ncbi_id = 'NZ_CP136842.1', group = 'Pseudomonas aeruginosa')
    db.session.add(hostAdd)
    db.session.commit()

with app.app_context():
    hostgff = pd.read_csv('Datasets/Meaden_2021/Notebook/Meaden_BIM_host_gff3.tsv', sep='\t', comment = '#')
    hostgenome = pickle.dumps(hostgff)
    hostGFF = HostGenome(name = 'P. aeruginosa PA14 BIM2 genome', host_id = 12, gff_data = hostgenome)
    db.session.add(hostGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_BIM_fractional_expression.tsv', norm = 'fractional',
               name = 'Meaden_2021_BIM2_infection', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, host_id=12, phage_id=9, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_BIM_full_TPM.tsv', norm = 'TPM',
               name = 'Meaden_2021_BIM2_infection', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, host_id=12, phage_id=9, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_BIM_TPM_means.tsv', norm = 'TPM_means',
               name = 'Meaden_2021_BIM2_infection', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, host_id=12, phage_id=9, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_BIM_TPM_std.tsv', norm = 'TPM_std',
               name = 'Meaden_2021_BIM2_infection', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, host_id=12, phage_id=9, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_BIM_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Meaden_2021_BIM2_infection', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, host_id=12, phage_id=9, doi = doi, upload_date = '25-08-12')

In [18]:
# [17] Add datasets meaden_2021 P. aeruginosa PA14 BIM control

description = 'Uninfected control of P. aeruginosa PA14 BIM2 (carrying two crispr spacers targeting DMS3vir) without addition of phage. Samples were taken 35, 60 and 120 minutes simultaneously to infected sample (n=5). [PRJEB31514]'
doi = 'https://doi.org/10.1038/s41396-020-00794-w'

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_BIM_Ctrl_fractional_expression.tsv', norm = 'fractional',
               name = 'Meaden_2021_BIM2_control', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, host_id=12, phage_id=9, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_BIM_Ctrl_full_TPM.tsv', norm = 'TPM',
               name = 'Meaden_2021_BIM2_control', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, host_id=12, phage_id=9, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_BIM_Ctrl_TPM_means.tsv', norm = 'TPM_means',
               name = 'Meaden_2021_BIM2_control', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, host_id=12, phage_id=9, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_BIM_Ctrl_TPM_std.tsv', norm = 'TPM_std',
               name = 'Meaden_2021_BIM2_control', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, host_id=12, phage_id=9, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Meaden_2021/Notebook/Meaden_BIM_Ctrl_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Meaden_2021_BIM2_control', year = 2021, journal = 'ISME', firstauthor = 'Meaden', pubmedID = 33011743, description = description, host_id=12, phage_id=9, doi = doi, upload_date = '25-08-12')

In [19]:
# [18] Add Zhong_2020 datasets 

description = 'Phage-resistant mutant Pseudomonas aeruginosa strain PAO1(r_8) infected with dsRNA Bacteriophage phiYY (MOI = 10). Samples were taken before (0) and 6, 12, 18 min post-infection in biological triplicates (n=3). [GSE128811]'
doi = 'https://doi.org/10.1016/j.isci.2020.101437'

with app.app_context():
    addFullDataset(phageName='phiYY', phageDes = 'Pseudomonas phage phiYY', phageID = 'KX074201.1 + KX074202.1 + KX074203.1', phageType = 'virulent',
               hostName = 'P. aeruginosa PAO1r_8', hostDes = 'Pseudomonas aeruginosa PAO1r_8', hostID = 'NC_002516.2', hostGroup = 'Pseudomonas aeruginosa',
               rnaSeqPath = 'Datasets/Zhong_2020/Notebook/Zhong_fractional_expression.tsv', norm = 'fractional',
               name = 'Zhong_2020', year = 2020, journal = 'iScience', firstauthor = 'Zhong', pubmedID = 32827855, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Zhong_2020/Notebook/Zhong_phage_gff3.tsv', phageGenomeName = 'Pseudomonas phage phiYY genome',
               hostGenomePath = 'Datasets/Zhong_2020/Notebook/Zhong_host_gff3.tsv', hostGenomeName = 'P. aeruginosa PAO1r_8 genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Zhong_2020/Notebook/Zhong_full_TPM.tsv', norm = 'TPM',
               name = 'Zhong_2020', year = 2020, journal = 'iScience', firstauthor = 'Zhong', pubmedID = 32827855, description = description, host_id=13, phage_id=10, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Zhong_2020/Notebook/Zhong_TPM_means.tsv', norm = 'TPM_means',
               name = 'Zhong_2020', year = 2020, journal = 'iScience', firstauthor = 'Zhong', pubmedID = 32827855, description = description, host_id=13, phage_id=10, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Zhong_2020/Notebook/Zhong_TPM_std.tsv', norm = 'TPM_std',
               name = 'Zhong_2020', year = 2020, journal = 'iScience', firstauthor = 'Zhong', pubmedID = 32827855, description = description, host_id=13, phage_id=10, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Zhong_2020/Notebook/Zhong_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Zhong_2020', year = 2020, journal = 'iScience', firstauthor = 'Zhong', pubmedID = 32827855, description = description, host_id=13, phage_id=10, doi = doi, upload_date = '25-08-12')



In [20]:
# [19] Add Lood_2020 datasets

description = 'Pseudomonas aeruginosa strain PAO1 infected with PA5oct (MOI = 50). Samples were taken before infection at 0 min (n=4) and 5, 15, 25 min post-infection (n=3). [GSE130190]'
doi = 'https://doi.org/10.1111/1462-2920.14979'

with app.app_context():
    phageAdd = Phage(name = 'vB_PaeM_PA5oct', description = 'Pseudomonas phage vB_PaeM_PA5oct', ncbi_id = 'MK797984.1', phage_type = 'virulent')
    db.session.add(phageAdd)
    db.session.commit()

with app.app_context():
    phagegff = pd.read_csv('Datasets/Lood_2020/Notebook/Lood_phage_gff3.tsv', sep='\t', comment = '#')
    phagegenome = pickle.dumps(phagegff)
    phageGFF = PhageGenome(name = 'vB_PaeM_PA5oct genome', phage_id = 11, gff_data = phagegenome)
    db.session.add(phageGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Lood_2020/Notebook/Lood_fractional_expression.tsv', norm = 'fractional',
               name = 'Lood_2020', year = 2020, journal = 'Environmental Microbiology', firstauthor = 'Lood', pubmedID = 32154616, description = description, host_id=8, phage_id=11, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Lood_2020/Notebook/Lood_full_TPM.tsv', norm = 'TPM',
               name = 'Lood_2020', year = 2020, journal = 'Environmental Microbiology', firstauthor = 'Lood', pubmedID = 32154616, description = description, host_id=8, phage_id=11, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Lood_2020/Notebook/Lood_TPM_means.tsv', norm = 'TPM_means',
               name = 'Lood_2020', year = 2020, journal = 'Environmental Microbiology', firstauthor = 'Lood', pubmedID = 32154616, description = description, host_id=8, phage_id=11, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Lood_2020/Notebook/Lood_TPM_std.tsv', norm = 'TPM_std',
               name = 'Lood_2020', year = 2020, journal = 'Environmental Microbiology', firstauthor = 'Lood', pubmedID = 32154616, description = description, host_id=8, phage_id=11, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Lood_2020/Notebook/Lood_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Lood_2020', year = 2020, journal = 'Environmental Microbiology', firstauthor = 'Lood', pubmedID = 32154616, description = description, host_id=8, phage_id=11, doi = doi, upload_date = '25-08-12')

In [21]:
# [20] Add Li_2020 datasets

description = 'Infection of Clostridioides difficile Ribotype 078 with phage JD032 (MOI = 10). Samples were taken before (0) and 30, 45, 75, 135 min post-infection in biological triplicates (n=3). [PRJNA559590]'
doi = 'https://doi.org/10.1128/mSystems.00017-20'

with app.app_context():
    addFullDataset(phageName='JD032', phageDes = 'Clostridioides phage JD032', phageID = 'MK473382.1', phageType = 'virulent',
               hostName = 'C. difficile RT078', hostDes = 'Clostridiodes difficile RT078 strain TW11', hostID = 'CP045224.1 + CP045225.1', hostGroup = 'Clostridiodes difficile',
               rnaSeqPath = 'Datasets/Li_2020/Notebook/Li_fractional_expression.tsv', norm = 'fractional',
               name = 'Li_2020', year = 2020, journal = 'mSystems', firstauthor = 'Li', pubmedID = 32371470, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Li_2020/Notebook/Li_phage_gff3.tsv', phageGenomeName = 'Clostridioides phage JD032 genome',
               hostGenomePath = 'Datasets/Li_2020/Notebook/Li_host_gff3.tsv', hostGenomeName = 'C. difficile RT078 genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Li_2020/Notebook/Li_full_TPM.tsv', norm = 'TPM',
               name = 'Li_2020', year = 2020, journal = 'mSystems', firstauthor = 'Li', pubmedID = 32371470, description = description, host_id=14, phage_id=12, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Li_2020/Notebook/Li_TPM_means.tsv', norm = 'TPM_means',
               name = 'Li_2020', year = 2020, journal = 'mSystems', firstauthor = 'Li', pubmedID = 32371470, description = description, host_id=14, phage_id=12, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Li_2020/Notebook/Li_TPM_std.tsv', norm = 'TPM_std',
               name = 'Li_2020', year = 2020, journal = 'mSystems', firstauthor = 'Li', pubmedID = 32371470, description = description, host_id=14, phage_id=12, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Li_2020/Notebook/Li_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Li_2020', year = 2020, journal = 'mSystems', firstauthor = 'Li', pubmedID = 32371470, description = description, host_id=14, phage_id=12, doi = doi, upload_date = '25-08-12')

In [22]:
# [21] Add Yang_2019 datasets Acinetobacter baumannii AB1 infection

description = 'Infection of drug resistant Acinetobacter baumannii AB1 isolated from hospital patient with phage phiAbp1 (MOI = 10). Samples were taken 5, 15, 30 min post-infection in biological triplicates (n=3). [GSE117396]'
doi = 'https://doi.org/10.1128/msystems.00068-19'

with app.app_context():
    addFullDataset(phageName='phiAbp1', phageDes = 'Acinetobacter phage phiAbp1', phageID = 'NC_021316.1', phageType = 'virulent',
               hostName = 'A. baumannii AB1', hostDes = 'Acinetobacter baumannii AB1 strain ATCC 17978-VU', hostID = 'NZ_CP018664.1', hostGroup = 'Acinetobacter baumannii',
               rnaSeqPath = 'Datasets/Yang_2019/Notebook/Yang_fractional_expression.tsv', norm = 'fractional',
               name = 'Yang_2019_infection', year = 2019, journal = 'mSystems', firstauthor = 'Yang', pubmedID = 31020041, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Yang_2019/Notebook/Yang_phage_gff3.tsv', phageGenomeName = 'Acinetobacter phage Abp1 genome',
               hostGenomePath = 'Datasets/Yang_2019/Notebook/Yang_host_gff3.tsv', hostGenomeName = 'A. baumannii AB1 genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Yang_2019/Notebook/Yang_full_TPM.tsv', norm = 'TPM',
               name = 'Yang_2019_infection', year = 2019, journal = 'mSystems', firstauthor = 'Yang', pubmedID = 31020041, description = description, host_id=15, phage_id=13, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Yang_2019/Notebook/Yang_TPM_means.tsv', norm = 'TPM_means',
               name = 'Yang_2019_infection', year = 2019, journal = 'mSystems', firstauthor = 'Yang', pubmedID = 31020041, description = description, host_id=15, phage_id=13, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Yang_2019/Notebook/Yang_TPM_std.tsv', norm = 'TPM_std',
               name = 'Yang_2019_infection', year = 2019, journal = 'mSystems', firstauthor = 'Yang', pubmedID = 31020041, description = description, host_id=15, phage_id=13, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Yang_2019/Notebook/Yang_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Yang_2019_infection', year = 2019, journal = 'mSystems', firstauthor = 'Yang', pubmedID = 31020041, description = description, host_id=15, phage_id=13, doi = doi, upload_date = '25-08-12')

In [23]:
# [22] Add Yang_2019 datasets Acinetobacter baumannii AB1 uninfected control

description = 'Uninfected control of drug resistant Acinetobacter baumannii AB1 isolated from hospital patient without addition of phage. Samples were collected at 5, 15, 30 min simultaneously to infected sample in biological triplicates (n=3). [GSE117396]'
doi = 'https://doi.org/10.1128/msystems.00068-19'

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Yang_2019/Notebook/Yang_ctrl_fractional_expression.tsv', norm = 'fractional',
               name = 'Yang_2019_control', year = 2019, journal = 'mSystems', firstauthor = 'Yang', pubmedID = 31020041, description = description, host_id=15, phage_id=13, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Yang_2019/Notebook/Yang_ctrl_full_TPM.tsv', norm = 'TPM',
               name = 'Yang_2019_control', year = 2019, journal = 'mSystems', firstauthor = 'Yang', pubmedID = 31020041, description = description, host_id=15, phage_id=13, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Yang_2019/Notebook/Yang_ctrl_TPM_means.tsv', norm = 'TPM_means',
               name = 'Yang_2019_control', year = 2019, journal = 'mSystems', firstauthor = 'Yang', pubmedID = 31020041, description = description, host_id=15, phage_id=13, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Yang_2019/Notebook/Yang_ctrl_TPM_std.tsv', norm = 'TPM_std',
               name = 'Yang_2019_control', year = 2019, journal = 'mSystems', firstauthor = 'Yang', pubmedID = 31020041, description = description, host_id=15, phage_id=13, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Yang_2019/Notebook/Yang_ctrl_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Yang_2019_control', year = 2019, journal = 'mSystems', firstauthor = 'Yang', pubmedID = 31020041, description = description, host_id=15, phage_id=13, doi = doi, upload_date = '25-08-12')

In [24]:
# [23] Add Ceyssens_2014 datasets (directional sequencing)

description = 'Infection of Pseudomonas aeruginosa PAO1 with bacteriophage phiKZ (MOI = 5). Samples were taken before (0) and 5, 15, 35 min post-infection in biological duplicates (n=2). Samples were analyzed by directional RNA sequencing (5x10^6 reads/sample). [GSE58494]'
doi = 'https://doi.org/10.1128/jvi.01347-14'

with app.app_context():
    phageAdd = Phage(name = 'phiKZ', description = 'Pseudomonas phage phiKZ', ncbi_id = 'NC_004629.1', phage_type = 'virulent')
    db.session.add(phageAdd)
    db.session.commit()

with app.app_context():
    phagegff = pd.read_csv('Datasets/Ceyssens_2014/Notebook/Ceyssens_directional_phage_gff3.tsv', sep='\t', comment = '#')
    phagegenome = pickle.dumps(phagegff)
    phageGFF = PhageGenome(name = 'phiKZ genome', phage_id = 14, gff_data = phagegenome)
    db.session.add(phageGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Ceyssens_2014/Notebook/Ceyssens_directional_fractional_expression.tsv', norm = 'fractional',
               name = 'Ceyssens_2014_directional', year = 2014, journal = 'Journal of Virology', firstauthor = 'Ceyssens', pubmedID = 24965474, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Ceyssens_2014/Notebook/Ceyssens_directional_full_TPM.tsv', norm = 'TPM',
               name = 'Ceyssens_2014_directional', year = 2014, journal = 'Journal of Virology', firstauthor = 'Ceyssens', pubmedID = 24965474, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Ceyssens_2014/Notebook/Ceyssens_directional_TPM_means.tsv', norm = 'TPM_means',
               name = 'Ceyssens_2014_directional', year = 2014, journal = 'Journal of Virology', firstauthor = 'Ceyssens', pubmedID = 24965474, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Ceyssens_2014/Notebook/Ceyssens_directional_TPM_std.tsv', norm = 'TPM_std',
               name = 'Ceyssens_2014_directional', year = 2014, journal = 'Journal of Virology', firstauthor = 'Ceyssens', pubmedID = 24965474, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Ceyssens_2014/Notebook/Ceyssens_directional_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Ceyssens_2014_directional', year = 2014, journal = 'Journal of Virology', firstauthor = 'Ceyssens', pubmedID = 24965474, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')

In [25]:
# [24] Add Ceyssens_2014 datasets (non-directional sequencing)

description = 'Infection of Pseudomonas aeruginosa PAO1 with bacteriophage phiKZ (MOI = 5). Samples were taken 10 and 35 min post-infection in biological duplicates (n=2). Samples were analyzed by deeper (nondirectional) RNA sequencing (5x10e7 reads/sample). [GSE58494]'
doi = 'https://doi.org/10.1128/jvi.01347-14'

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Ceyssens_2014/Notebook/Ceyssens_non-directional_fractional_expression.tsv', norm = 'fractional',
               name = 'Ceyssens_2014_non-directional', year = 2014, journal = 'Journal of Virology', firstauthor = 'Ceyssens', pubmedID = 24965474, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Ceyssens_2014/Notebook/Ceyssens_non-directional_full_TPM.tsv', norm = 'TPM',
               name = 'Ceyssens_2014_non-directional', year = 2014, journal = 'Journal of Virology', firstauthor = 'Ceyssens', pubmedID = 24965474, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Ceyssens_2014/Notebook/Ceyssens_non-directional_TPM_means.tsv', norm = 'TPM_means',
               name = 'Ceyssens_2014_non-directional', year = 2014, journal = 'Journal of Virology', firstauthor = 'Ceyssens', pubmedID = 24965474, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Ceyssens_2014/Notebook/Ceyssens_non-directional_TPM_std.tsv', norm = 'TPM_std',
               name = 'Ceyssens_2014_non-directional', year = 2014, journal = 'Journal of Virology', firstauthor = 'Ceyssens', pubmedID = 24965474, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Ceyssens_2014/Notebook/Ceyssens_non-directional_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Ceyssens_2014_non-directional', year = 2014, journal = 'Journal of Virology', firstauthor = 'Ceyssens', pubmedID = 24965474, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')

# Datasets Round 2

In [26]:
# [25] Add Kongari_2024 dataset (wt)

description = 'Infection of clinical isolate Staphyloccocus aureus USA300 strain NRS384 with Staphylococcus phage K (MOI = 5). Samples were taken before (0) and 2, 5, 10, 20, 30, 40 min post infection (n=4). [GSE253516]'
doi = 'https://doi.org/10.3390/v16111773'

with app.app_context():
    addFullDataset(phageName='Phage K', phageDes = 'Staphylococcus Phage K', phageID = 'NC_005880.2', phageType = 'virulent',
               hostName = 'S. aureus USA300 NRS384', hostDes = 'Staphylococcus aureus USA300 clinical isolate strain NRS384', hostID = 'CP027476.1', hostGroup = 'Staphylococcus aureus',
               rnaSeqPath = 'Datasets/Kongari_2024/notebook/Kongari_WT_fractional_expression.tsv', norm = 'fractional',
               name = 'Kongari_2024_WT', year = 2024, journal = 'Viruses', firstauthor = 'Kongari', pubmedID = 39599887, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Kongari_2024/notebook/Kongari_WT_phage_gff3.tsv', phageGenomeName = 'Staphylococcus phage K genome',
               hostGenomePath = 'Datasets/Kongari_2024/notebook/Kongari_WT_host_gff3.tsv', hostGenomeName = 'S. aureus USA300 NRS384 genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Kongari_2024/notebook/Kongari_WT_full_TPM.tsv', norm = 'TPM',
               name = 'Kongari_2024_WT', year = 2024, journal = 'Viruses', firstauthor = 'Kongari', pubmedID = 39599887, description = description, host_id=16, phage_id=15, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Kongari_2024/notebook/Kongari_WT_TPM_means.tsv', norm = 'TPM_means',
               name = 'Kongari_2024_WT', year = 2024, journal = 'Viruses', firstauthor = 'Kongari', pubmedID = 39599887, description = description, host_id=16, phage_id=15, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Kongari_2024/notebook/Kongari_WT_TPM_std.tsv', norm = 'TPM_std',
               name = 'Kongari_2024_WT', year = 2024, journal = 'Viruses', firstauthor = 'Kongari', pubmedID = 39599887, description = description, host_id=16, phage_id=15, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Kongari_2024/notebook/Kongari_WT_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Kongari_2024_WT', year = 2024, journal = 'Viruses', firstauthor = 'Kongari', pubmedID = 39599887, description = description, host_id=16, phage_id=15, doi = doi, upload_date = '25-08-12')

In [27]:
# [26] Add Kongari_2024 dataset (rpoC G17D)

description = 'Infection of clinical isolate Staphyloccocus aureses USA300 strain NRS384 rpoC G17D mutant with Staphylococcus phage K (MOI = 5). Samples were taken before (0) and 2, 5, 10, 20, 30, 40 min post infection (n=4). [GSE253516]'
doi = 'https://doi.org/10.3390/v16111773'

with app.app_context():
    hostAdd = Host(name = 'S. aureus USA300 NRS384 rpoC G17D', description = 'Staphylococcus aureus USA300 clinical isolate strain NRS384 rpoC G17D', ncbi_id = 'CP027476.1', group = 'Staphylococcus aureus')
    db.session.add(hostAdd)
    db.session.commit()

with app.app_context():
    hostgff = pd.read_csv('Datasets/Kongari_2024/notebook/Kongari_rpoC_G17D_host_gff3.tsv', sep='\t', comment = '#')
    hostgenome = pickle.dumps(hostgff)
    hostGFF = HostGenome(name = 'S. aureus USA300 NRS384 rpoC G17D genome', host_id = 17, gff_data = hostgenome)
    db.session.add(hostGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Kongari_2024/notebook/Kongari_rpoC_G17D_fractional_expression.tsv', norm = 'fractional',
               name = 'Kongari_2024_rpoC-G17D', year = 2024, journal = 'Viruses', firstauthor = 'Kongari', pubmedID = 39599887, description = description, host_id=17, phage_id=15, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Kongari_2024/notebook/Kongari_rpoC_G17D_full_TPM.tsv', norm = 'TPM',
               name = 'Kongari_2024_rpoC-G17D', year = 2024, journal = 'Viruses', firstauthor = 'Kongari', pubmedID = 39599887, description = description, host_id=17, phage_id=15, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Kongari_2024/notebook/Kongari_rpoC_G17D_TPM_means.tsv', norm = 'TPM_means',
               name = 'Kongari_2024_rpoC-G17D', year = 2024, journal = 'Viruses', firstauthor = 'Kongari', pubmedID = 39599887, description = description, host_id=17, phage_id=15, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Kongari_2024/notebook/Kongari_rpoC_G17D_TPM_std.tsv', norm = 'TPM_std',
               name = 'Kongari_2024_rpoC-G17D', year = 2024, journal = 'Viruses', firstauthor = 'Kongari', pubmedID = 39599887, description = description, host_id=17, phage_id=15, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Kongari_2024/notebook/Kongari_rpoC_G17D_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Kongari_2024_rpoC-G17D', year = 2024, journal = 'Viruses', firstauthor = 'Kongari', pubmedID = 39599887, description = description, host_id=17, phage_id=15, doi = doi, upload_date = '25-08-12')


In [28]:
# [27] Add Gerovac_1_2024 dataset

description = 'Infection of exponentially growing Pseudomonas aeruginosa PAO1 in M9 (+Glucose) with Pseudomonas phage phiKZ (MOI = 15). Samples were taken before (0) and 2, 4, 6, 8, 10 min post infection in biological duplicates (n=2). Only early infection phase is captured, thus no classification of phage genes (early, middle, late) is performed. [GSE223979]'
doi = 'https://doi.org/10.1038/s41564-024-01616-x'

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_1/notebook/Gerovac_fractional_expression.tsv', norm = 'fractional',
               name = 'Gerovac_1_2024', year = 2024, journal = 'Nature microbiology', firstauthor = 'Gerovac', pubmedID = 38443577, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_1/notebook/Gerovac_full_TPM.tsv', norm = 'TPM',
               name = 'Gerovac_1_2024', year = 2024, journal = 'Nature microbiology', firstauthor = 'Gerovac', pubmedID = 38443577, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_1/notebook/Gerovac_TPM_means.tsv', norm = 'TPM_means',
               name = 'Gerovac_1_2024', year = 2024, journal = 'Nature microbiology', firstauthor = 'Gerovac', pubmedID = 38443577, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_1/notebook/Gerovac_TPM_std.tsv', norm = 'TPM_std',
               name = 'Gerovac_1_2024', year = 2024, journal = 'Nature microbiology', firstauthor = 'Gerovac', pubmedID = 38443577, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_1/notebook/Gerovac_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Gerovac_1_2024', year = 2024, journal = 'Nature microbiology', firstauthor = 'Gerovac', pubmedID = 38443577, description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')

In [29]:
# [28] Add Gerovac_2_2024_ChmA-KD dataset

description = 'Infection of Pseudomonas aeruginosa PAO1 ChmA knockdown (pretreatment with chmA transcript ASO targeting ribosomal binding site and start codon) with Pseudomonas phage phiKZ (MOI = 5). Samples were taken 10, 15, 20, 25, 35 min post infection in biological duplicates (n=2). [GSE269401]'
doi = 'https://doi.org/10.1101/2024.07.31.605949'

with app.app_context():
    phageAdd = Phage(name = 'phiKZ ChmA-KD', description = 'Pseudomonas phage phiKZ ChmA-KD', ncbi_id = 'NC_004629.1', phage_type = 'virulent')
    db.session.add(phageAdd)
    db.session.commit()

with app.app_context():
    phagegff = pd.read_csv('Datasets/Gerovac_2024_2/notebook/Gerovac_ChmA-KD_phage_gff3.tsv', sep='\t', comment = '#')
    phagegenome = pickle.dumps(phagegff)
    phageGFF = PhageGenome(name = 'phiKZ ChmA-KD genome', phage_id = 16, gff_data = phagegenome)
    db.session.add(phageGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_2/notebook/Gerovac_ChmA-KD_fractional_expression.tsv', norm = 'fractional',
               name = 'Gerovac_2_2024', year = 2024, journal = 'BioRxiv', firstauthor = 'Gerovac', pubmedID = "preprint", description = description, host_id=8, phage_id=16, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_2/notebook/Gerovac_ChmA-KD_full_TPM.tsv', norm = 'TPM',
               name = 'Gerovac_2_2024', year = 2024, journal = 'BioRxiv', firstauthor = 'Gerovac', pubmedID = "preprint", description = description, host_id=8, phage_id=16, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_2/notebook/Gerovac_ChmA-KD_TPM_means.tsv', norm = 'TPM_means',
               name = 'Gerovac_2_2024', year = 2024, journal = 'BioRxiv', firstauthor = 'Gerovac', pubmedID = "preprint", description = description, host_id=8, phage_id=16, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_2/notebook/Gerovac_ChmA-KD_TPM_std.tsv', norm = 'TPM_std',
               name = 'Gerovac_2_2024', year = 2024, journal = 'BioRxiv', firstauthor = 'Gerovac', pubmedID = "preprint", description = description, host_id=8, phage_id=16, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_2/notebook/Gerovac_ChmA-KD_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Gerovac_2_2024', year = 2024, journal = 'BioRxiv', firstauthor = 'Gerovac', pubmedID = "preprint", description = description, host_id=8, phage_id=16, doi = doi, upload_date = '25-08-12')


In [30]:
# [29] Add Gerovac_2_2024_WT dataset

description = 'Infection of Pseudomonas aeruginosa PAO1 with Pseudomonas phage phiKZ (MOI = 5). Samples were taken 10, 15, 20, 25, 35 min post infection in biological duplicates (n=2). [GSE269401]'
doi = 'https://doi.org/10.1101/2024.07.31.605949'

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_2/notebook/Gerovac_WT_fractional_expression.tsv', norm = 'fractional',
               name = 'Gerovac_2_2024', year = 2024, journal = 'BioRxiv', firstauthor = 'Gerovac', pubmedID = "preprint", description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_2/notebook/Gerovac_WT_full_TPM.tsv', norm = 'TPM',
               name = 'Gerovac_2_2024', year = 2024, journal = 'BioRxiv', firstauthor = 'Gerovac', pubmedID = "preprint", description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_2/notebook/Gerovac_WT_TPM_means.tsv', norm = 'TPM_means',
               name = 'Gerovac_2_2024', year = 2024, journal = 'BioRxiv', firstauthor = 'Gerovac', pubmedID = "preprint", description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_2/notebook/Gerovac_WT_TPM_std.tsv', norm = 'TPM_std',
               name = 'Gerovac_2_2024', year = 2024, journal = 'BioRxiv', firstauthor = 'Gerovac', pubmedID = "preprint", description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Gerovac_2024_2/notebook/Gerovac_WT_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Gerovac_2_2024', year = 2024, journal = 'BioRxiv', firstauthor = 'Gerovac', pubmedID = "preprint", description = description, host_id=8, phage_id=14, doi = doi, upload_date = '25-08-12')

In [31]:
# [30] Add Silva_exponential dataset

description = 'Infection of exponential cultures of S. epidermidis 9142 with Staphylococcus phage SEP1 (MOI=5). Samples were taken before (0) and 5, 15, 30 min post infection in biological triplicates (n=3). [GSE254200]'
doi = 'https://doi.org/10.1128/msystems.00263-24'

with app.app_context():
    addFullDataset(phageName='SEP1', phageDes = 'Staphylococcus phage SEP1', phageID = 'NC_041928.1', phageType = 'virulent',
               hostName = 'S. epidermidis 9142', hostDes = 'Staphylococcus epidermidis 9142', hostID = 'CP000029.1', hostGroup = 'Staphylococcus epidermidis',
               rnaSeqPath = 'Datasets/Silva_2024/notebook/Silva_exponential_fractional_expression.tsv', norm = 'fractional',
               name = 'Silva_2024_exponential', year = 2024, journal = 'mSystems', firstauthor = 'Silva', pubmedID = 38904376, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Silva_2024/notebook/Silva_exponential_phage_gff3.tsv', phageGenomeName = 'Staphylococcus phage SEP1 genome',
               hostGenomePath = 'Datasets/Silva_2024/notebook/Silva_exponential_host_gff3.tsv', hostGenomeName = 'S. epidermidis 9142 genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Silva_2024/notebook/Silva_exponential_full_TPM.tsv', norm = 'TPM',
               name = 'Silva_2024_exponential', year = 2024, journal = 'mSystems', firstauthor = 'Silva', pubmedID = 38904376, description = description, host_id=18, phage_id=17, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Silva_2024/notebook/Silva_exponential_TPM_means.tsv', norm = 'TPM_means',
               name = 'Silva_2024_exponential', year = 2024, journal = 'mSystems', firstauthor = 'Silva', pubmedID = 38904376, description = description, host_id=18, phage_id=17, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Silva_2024/notebook/Silva_exponential_TPM_std.tsv', norm = 'TPM_std',
               name = 'Silva_2024_exponential', year = 2024, journal = 'mSystems', firstauthor = 'Silva', pubmedID = 38904376, description = description, host_id=18, phage_id=17, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Silva_2024/notebook/Silva_exponential_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Silva_2024_exponential', year = 2024, journal = 'mSystems', firstauthor = 'Silva', pubmedID = 38904376, description = description, host_id=18, phage_id=17, doi = doi, upload_date = '25-08-12')

In [32]:
# [31] Add Silva_stationary dataset

description = 'Infection of stationary cultures of S. epidermidis 9142 with Staphylococcus phage SEP1 (MOI=5). Samples were taken before (0) and 5, 15, 30 min post infection in biological triplicates (n=3). [GSE254200]'
doi = 'https://doi.org/10.1128/msystems.00263-24'

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Silva_2024/notebook/Silva_stationary_fractional_expression.tsv', norm = 'fractional',
               name = 'Silva_2024_stationary', year = 2024, journal = 'mSystems', firstauthor = 'Silva', pubmedID = 38904376, description = description, host_id=18, phage_id=17, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Silva_2024/notebook/Silva_stationary_full_TPM.tsv', norm = 'TPM',
               name = 'Silva_2024_stationary', year = 2024, journal = 'mSystems', firstauthor = 'Silva', pubmedID = 38904376, description = description, host_id=18, phage_id=17, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Silva_2024/notebook/Silva_stationary_TPM_means.tsv', norm = 'TPM_means',
               name = 'Silva_2024_stationary', year = 2024, journal = 'mSystems', firstauthor = 'Silva', pubmedID = 38904376, description = description, host_id=18, phage_id=17, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Silva_2024/notebook/Silva_stationary_TPM_std.tsv', norm = 'TPM_std',
               name = 'Silva_2024_stationary', year = 2024, journal = 'mSystems', firstauthor = 'Silva', pubmedID = 38904376, description = description, host_id=18, phage_id=17, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Silva_2024/notebook/Silva_stationary_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Silva_2024_stationary', year = 2024, journal = 'mSystems', firstauthor = 'Silva', pubmedID = 38904376, description = description, host_id=18, phage_id=17, doi = doi, upload_date = '25-08-12')

In [33]:
# [32] Add McLaughlin dataset

description = 'Infection of Caulobacter Crescentus CB15 with Caulobacter phage phiCbK (MOI = 10). Samples were taken before (0) and 15, 30, 45, 60, 75, 90 min post infection in biological triplicates (n=3). [GSE241056]'
doi = 'https://doi.org/10.1371/journal.pgen.1011048'

with app.app_context():
    addFullDataset(phageName='phiCbK', phageDes = 'Caulobacter phage phiCbK', phageID = 'JX100813.1', phageType = 'virulent',
               hostName = 'C. crescentus CB15', hostDes = 'Caulobacter crescentus CB15', hostID = 'CP001340.1', hostGroup = 'Caulobacter crescentus',
               rnaSeqPath = 'Datasets/McLaughlin_2023/notebook/McLaughlin_fractional_expression.tsv', norm = 'fractional',
               name = 'McLaughlin_2023', year = 2023, journal = 'PLOS Genetics', firstauthor = 'McLaughlin', pubmedID = 37972151, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/McLaughlin_2023/notebook/McLaughlin_phage_gff3.tsv', phageGenomeName = 'Caulobacter phage phiCbK genome',
               hostGenomePath = 'Datasets/McLaughlin_2023/notebook/McLaughlin_host_gff3.tsv', hostGenomeName = 'C. crescentus CB15 genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/McLaughlin_2023/notebook/McLaughlin_full_TPM.tsv', norm = 'TPM',
               name = 'McLaughlin_2023', year = 2023, journal = 'PLOS Genetics', firstauthor = 'McLaughlin', pubmedID = 37972151, description = description, host_id=19, phage_id=18, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/McLaughlin_2023/notebook/McLaughlin_TPM_means.tsv', norm = 'TPM_means',
               name = 'McLaughlin_2023', year = 2023, journal = 'PLOS Genetics', firstauthor = 'McLaughlin', pubmedID = 37972151, description = description, host_id=19, phage_id=18, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/McLaughlin_2023/notebook/McLaughlin_TPM_std.tsv', norm = 'TPM_std',
               name = 'McLaughlin_2023', year = 2023, journal = 'PLOS Genetics', firstauthor = 'McLaughlin', pubmedID = 37972151, description = description, host_id=19, phage_id=18, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/McLaughlin_2023/notebook/McLaughlin_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'McLaughlin_2023', year = 2023, journal = 'PLOS Genetics', firstauthor = 'McLaughlin', pubmedID = 37972151, description = description, host_id=19, phage_id=18, doi = doi, upload_date = '25-08-12')

In [34]:
# [33] Add Bürkle_mono-infection dataset

description = 'Infection of Pseudomonas aeruginosa PAO1 with Pseudomonas phage JG024. Samples were taken 8 and 24 min post-infection in biological duplicates (n=2). Phage genes were not classified into infection phases due to limited temporal resolution of sampling. [GSE271537]'
doi = 'https://doi.org/10.1093/ismejo/wraf065'

with app.app_context():
    phageAdd = Phage(name = 'JG024', description = 'Pseudomonas phage JG024', ncbi_id = 'NC_017674.1', phage_type = 'virulent')
    db.session.add(phageAdd)
    db.session.commit()

with app.app_context():
    phagegff = pd.read_csv('Datasets/Bürkle_2025/notebook/Bürkle_JG024_phage_gff3.tsv', sep='\t', comment = '#')
    phagegenome = pickle.dumps(phagegff)
    phageGFF = PhageGenome(name = 'JG024 genome', phage_id = 19, gff_data = phagegenome)
    db.session.add(phageGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Bürkle_2025/notebook/Bürkle_JG024_fractional_expression.tsv', norm = 'fractional',
               name = 'Bürkle_2025_mono-infection', year = 2025, journal = 'ISME', firstauthor = 'Bürkle', pubmedID = 40188480, description = description, host_id=8, phage_id=19, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Bürkle_2025/notebook/Bürkle_JG024_full_TPM.tsv', norm = 'TPM',
               name = 'Bürkle_2025_mono-infection', year = 2025, journal = 'ISME', firstauthor = 'Bürkle', pubmedID = 40188480, description = description, host_id=8, phage_id=19, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Bürkle_2025/notebook/Bürkle_JG024_TPM_means.tsv', norm = 'TPM_means',
               name = 'Bürkle_2025_mono-infection', year = 2025, journal = 'ISME', firstauthor = 'Bürkle', pubmedID = 40188480, description = description, host_id=8, phage_id=19, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Bürkle_2025/notebook/Bürkle_JG024_TPM_std.tsv', norm = 'TPM_std',
               name = 'Bürkle_2025_mono-infection', year = 2025, journal = 'ISME', firstauthor = 'Bürkle', pubmedID = 40188480, description = description, host_id=8, phage_id=19, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Bürkle_2025/notebook/Bürkle_JG024_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Bürkle_2025_mono-infection', year = 2025, journal = 'ISME', firstauthor = 'Bürkle', pubmedID = 40188480, description = description, host_id=8, phage_id=19, doi = doi, upload_date = '25-08-12')


In [35]:
# [34] Add Bürkle co-infection dataset

description = 'Co-infection of Pseudomonas aeruginosa PAO1 with Pseudomonas phage JG024 and JG005. Samples were taken 8 and 24 min post-infection in biological duplicates (n=2). Phage genes were not classified into infection phases due to limited temporal resolution of sampling. [GSE271537]'
doi = 'https://doi.org/10.1093/ismejo/wraf065'

with app.app_context():
    phageAdd = Phage(name = 'Phage cocktail (JG024 + JG005)', description = 'Phage cocktail (Pseudomonas phage JG024 + JG005)', ncbi_id = 'NC_017674.1 + PP712940.1', phage_type = 'virulent')
    db.session.add(phageAdd)
    db.session.commit()

with app.app_context():
    phagegff = pd.read_csv('Datasets/Bürkle_2025/notebook/Bürkle_phage-cocktail_phage_gff3.tsv', sep='\t', comment = '#')
    phagegenome = pickle.dumps(phagegff)
    phageGFF = PhageGenome(name = 'Phage cocktail (JG024 + JG005) genomes', phage_id = 20, gff_data = phagegenome)
    db.session.add(phageGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Bürkle_2025/notebook/Bürkle_phage-cocktail_fractional_expression.tsv', norm = 'fractional',
               name = 'Bürkle_2025_co-infection', year = 2025, journal = 'ISME', firstauthor = 'Bürkle', pubmedID = 40188480, description = description, host_id=8, phage_id=20, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Bürkle_2025/notebook/Bürkle_phage-cocktail_full_TPM.tsv', norm = 'TPM',
               name = 'Bürkle_2025_co-infection', year = 2025, journal = 'ISME', firstauthor = 'Bürkle', pubmedID = 40188480, description = description, host_id=8, phage_id=20, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Bürkle_2025/notebook/Bürkle_phage-cocktail_TPM_means.tsv', norm = 'TPM_means',
               name = 'Bürkle_2025_co-infection', year = 2025, journal = 'ISME', firstauthor = 'Bürkle', pubmedID = 40188480, description = description, host_id=8, phage_id=20, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Bürkle_2025/notebook/Bürkle_phage-cocktail_TPM_std.tsv', norm = 'TPM_std',
               name = 'Bürkle_2025_co-infection', year = 2025, journal = 'ISME', firstauthor = 'Bürkle', pubmedID = 40188480, description = description, host_id=8, phage_id=20, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Bürkle_2025/notebook/Bürkle_phage-cocktail_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Bürkle_2025_co-infection', year = 2025, journal = 'ISME', firstauthor = 'Bürkle', pubmedID = 40188480, description = description, host_id=8, phage_id=20, doi = doi, upload_date = '25-08-12')

In [36]:
# [35] Add Buttimer dataset
 
description = 'Infection of methicillin-resistent Staphylococcus aureus (MRSA) E1185(IV)ST12 with Staphyloccous phage SAM1 which was isolated from Fersisi commercial phage cocktail (MOI=10). Samples were taken 15, 35, 45 min post-infection in biological triplicates (n=3). [GSE192733]'
doi = 'https://doi.org/10.3390/v14030626'

with app.app_context():
    addFullDataset(phageName='SAM1', phageDes = 'Staphylococcus phage SAM1', phageID = 'MT338525.1', phageType = 'virulent',
               hostName = 'S. aureus E1185_IV_ST12', hostDes = 'Staphylococcus aureus strain E1185_IV_ST12', hostID = 'CP089586.1', hostGroup = 'Staphylococcus aureus',
               rnaSeqPath = 'Datasets/Buttimer_2022/notebook/Buttimer_fractional_expression.tsv', norm = 'fractional',
               name = 'Buttimer_2022', year = 2022, journal = 'Viruses', firstauthor = 'Buttimer', pubmedID = 35337034, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Buttimer_2022/notebook/Buttimer_phage_gff3.tsv', phageGenomeName = 'Staphylococcus phage SAM1 genome',
               hostGenomePath = 'Datasets/Buttimer_2022/notebook/Buttimer_host_gff3.tsv', hostGenomeName = 'S. aureus E1185_IV_ST12 genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Buttimer_2022/notebook/Buttimer_full_TPM.tsv', norm = 'TPM',
               name = 'Buttimer_2022', year = 2022, journal = 'Viruses', firstauthor = 'Buttimer', pubmedID = 35337034, description = description, host_id=20, phage_id=21, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Buttimer_2022/notebook/Buttimer_TPM_means.tsv', norm = 'TPM_means',
               name = 'Buttimer_2022', year = 2022, journal = 'Viruses', firstauthor = 'Buttimer', pubmedID = 35337034, description = description, host_id=20, phage_id=21, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Buttimer_2022/notebook/Buttimer_TPM_std.tsv', norm = 'TPM_std',
               name = 'Buttimer_2022', year = 2022, journal = 'Viruses', firstauthor = 'Buttimer', pubmedID = 35337034, description = description, host_id=20, phage_id=21, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Buttimer_2022/notebook/Buttimer_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Buttimer_2022', year = 2022, journal = 'Viruses', firstauthor = 'Buttimer', pubmedID = 35337034, description = description, host_id=20, phage_id=21, doi = doi, upload_date = '25-08-12')

In [37]:
# [36] Add Huang control dataset

description = 'Uninfected control of picocyanobacteria Synechococcus strain WH7803 without addition of phage. Samples were collected simultaneously to infected samples at 15min, 1h, 3h, 5h and 7h (n=1). [GSE150732]'
doi = 'https://doi.org/10.1002/mbo3.1150'

with app.app_context():
    addFullDataset(phageName='S-SBP1', phageDes = 'Synechococcus phage S-SBP1', phageID = 'MT424636.1', phageType = 'virulent',
               hostName = 'Synechococcus WH7803', hostDes = 'Synechococcus sp. WH7803', hostID = 'CT971583.1', hostGroup = 'Synechococcus species',
               rnaSeqPath = 'Datasets/Huang_2021/notebook/Huang_ctrl_fractional_expression.tsv', norm = 'fractional',
               name = 'Haung_2021_control', year = 2021, journal = 'Microbiology Open', firstauthor = 'Huang', pubmedID = 33377630, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Huang_2021/notebook/Huang_ctrl_phage_gff3.tsv', phageGenomeName = 'Synechococcus phage S-SBP1 genome',
               hostGenomePath = 'Datasets/Huang_2021/notebook/Huang_ctrl_host_gff3.tsv', hostGenomeName = 'Synechococcus sp. WH7803 genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Huang_2021/notebook/Huang_ctrl_full_TPM.tsv', norm = 'TPM',
               name = 'Haung_2021_control', year = 2021, journal = 'Microbiology Open', firstauthor = 'Huang', pubmedID = 33377630, description = description, host_id=21, phage_id=22, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Huang_2021/notebook/Huang_ctrl_TPM_means.tsv', norm = 'TPM_means',
               name = 'Haung_2021_control', year = 2021, journal = 'Microbiology Open', firstauthor = 'Huang', pubmedID = 33377630, description = description, host_id=21, phage_id=22, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Huang_2021/notebook/Huang_ctrl_TPM_std.tsv', norm = 'TPM_std',
               name = 'Haung_2021_control', year = 2021, journal = 'Microbiology Open', firstauthor = 'Huang', pubmedID = 33377630, description = description, host_id=21, phage_id=22, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Huang_2021/notebook/Huang_ctrl_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Haung_2021_control', year = 2021, journal = 'Microbiology Open', firstauthor = 'Huang', pubmedID = 33377630, description = description, host_id=21, phage_id=22, doi = doi, upload_date = '25-08-12')

In [38]:
# [37] Add Huang infection dataset

description = 'Infection of picocyanobacteria Synechococcus strain WH7803 with Synechococcus phage S-SBP1 (MOI=2). Samples were taken 15min, 1h, 3h, 5h and 7h post-infection (n=1). [GSE150732]'
doi = 'https://doi.org/10.1002/mbo3.1150'

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Huang_2021/notebook/Huang_infection_fractional_expression.tsv', norm = 'fractional',
               name = 'Haung_2021_infection', year = 2021, journal = 'Microbiology Open', firstauthor = 'Huang', pubmedID = 33377630, description = description, host_id=21, phage_id=22, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Huang_2021/notebook/Huang_infection_full_TPM.tsv', norm = 'TPM',
               name = 'Haung_2021_infection', year = 2021, journal = 'Microbiology Open', firstauthor = 'Huang', pubmedID = 33377630, description = description, host_id=21, phage_id=22, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Huang_2021/notebook/Huang_infection_TPM_means.tsv', norm = 'TPM_means',
               name = 'Haung_2021_infection', year = 2021, journal = 'Microbiology Open', firstauthor = 'Huang', pubmedID = 33377630, description = description, host_id=21, phage_id=22, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Huang_2021/notebook/Huang_infection_TPM_std.tsv', norm = 'TPM_std',
               name = 'Haung_2021_infection', year = 2021, journal = 'Microbiology Open', firstauthor = 'Huang', pubmedID = 33377630, description = description, host_id=21, phage_id=22, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Huang_2021/notebook/Huang_infection_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Haung_2021_infection', year = 2021, journal = 'Microbiology Open', firstauthor = 'Huang', pubmedID = 33377630, description = description, host_id=21, phage_id=22, doi = doi, upload_date = '25-08-12')

In [39]:
# [38] Add Furi dataset

description = 'Infection of Streptococcus pneumoniae strain D39 with Streptococcus phage SpSL1 (MOI=0.2). Samples were taken 10, 50, 90 min post-infection in biological triplicates (n=3). [GSE132611]'
doi = 'https://doi.org/10.1128/jb.00370-19'

with app.app_context():
    addFullDataset(phageName='SpSL1', phageDes = 'Streptococcus phage SpSL1', phageID = 'KM882824.1', phageType = 'temperate',
               hostName = 'S. pneumoniae strain D39', hostDes = 'Streptococcus pneumoniae strain D39', hostID = 'NC_008533.2', hostGroup = 'Streptococcus pneumoniae',
               rnaSeqPath = 'Datasets/Furi_2019/notebook/Furi_fractional_expression.tsv', norm = 'fractional',
               name = 'Furi_2019', year = 2019, journal = 'mSphere', firstauthor = 'Furi', pubmedID = 31285240, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Furi_2019/notebook/Furi_phage_gff3.tsv', phageGenomeName = 'Streptococcus phage SpSL1 genome',
               hostGenomePath = 'Datasets/Furi_2019/notebook/Furi_host_gff3.tsv', hostGenomeName = 'S. pneumoniae strain D39 genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Furi_2019/notebook/Furi_full_TPM.tsv', norm = 'TPM',
               name = 'Furi_2019', year = 2019, journal = 'mSphere', firstauthor = 'Furi', pubmedID = 31285240, description = description, host_id=22, phage_id=23, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Furi_2019/notebook/Furi_TPM_means.tsv', norm = 'TPM_means',
               name = 'Furi_2019', year = 2019, journal = 'mSphere', firstauthor = 'Furi', pubmedID = 31285240, description = description, host_id=22, phage_id=23, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Furi_2019/notebook/Furi_TPM_std.tsv', norm = 'TPM_std',
               name = 'Furi_2019', year = 2019, journal = 'mSphere', firstauthor = 'Furi', pubmedID = 31285240, description = description, host_id=22, phage_id=23, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Furi_2019/notebook/Furi_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Furi_2019', year = 2019, journal = 'mSphere', firstauthor = 'Furi', pubmedID = 31285240, description = description, host_id=22, phage_id=23, doi = doi, upload_date = '25-08-12')

In [40]:
# [39] Add Doron WH7803 dataset

description = 'Infection of Synechococcus sp. WH7803 with T4-like Synechococcus phage syn9. Samlpes were taken 5, 30, 60, 120 min post-infection in biological duplicates (n=2). [GSE74921]'
doi = 'https://doi.org/10.1038/ismej.2015.210'

with app.app_context():
    phageAdd = Phage(name = 'Syn9', description = 'Synechococcus phage Syn9', ncbi_id = 'NC_008296.2', phage_type = 'virulent')
    db.session.add(phageAdd)
    db.session.commit()

with app.app_context():
    phagegff = pd.read_csv('Datasets/Doron_2016/notebook/Doron_WH7803_phage_gff3.tsv', sep='\t', comment = '#')
    phagegenome = pickle.dumps(phagegff)
    phageGFF = PhageGenome(name = 'Synechoccus phage Syn9 genome', phage_id = 24, gff_data = phagegenome)
    db.session.add(phageGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH7803_fractional_expression.tsv', norm = 'fractional',
               name = 'Doron_2016_WH7803', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=21, phage_id=24, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH7803_full_TPM.tsv', norm = 'TPM',
               name = 'Doron_2016_WH7803', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=21, phage_id=24, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH7803_TPM_means.tsv', norm = 'TPM_means',
               name = 'Doron_2016_WH7803', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=21, phage_id=24, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH7803_TPM_std.tsv', norm = 'TPM_std',
               name = 'Doron_2016_WH7803', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=21, phage_id=24, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH7803_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Doron_2016_WH7803', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=21, phage_id=24, doi = doi, upload_date = '25-08-12')


In [41]:
# [40] Add Doron WH8102 dataset

description = 'Infection of Synechococcus sp. WH8102 with T4-like Synechococcus phage syn9. Samlpes were taken 5, 30, 60, 120 min post-infection in biological duplicates (n=2). [GSE74921]'
doi = 'https://doi.org/10.1038/ismej.2015.210'

with app.app_context():
    hostAdd = Host(name = 'Synechococcus WH8102', description = 'Synechococcus sp. WH8102', ncbi_id = 'NC_005070', group = 'Synechococcus species')
    db.session.add(hostAdd)
    db.session.commit()

with app.app_context():
    hostgff = pd.read_csv('Datasets/Doron_2016/notebook/Doron_WH8102_host_gff3.tsv', sep='\t', comment = '#')
    hostgenome = pickle.dumps(hostgff)
    hostGFF = HostGenome(name = 'Synechococcus sp. WH8102 genome', host_id = 23, gff_data = hostgenome)
    db.session.add(hostGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH8102_fractional_expression.tsv', norm = 'fractional',
               name = 'Doron_2016_WH8102', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=23, phage_id=24, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH8102_full_TPM.tsv', norm = 'TPM',
               name = 'Doron_2016_WH8102', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=23, phage_id=24, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH8102_TPM_means.tsv', norm = 'TPM_means',
               name = 'Doron_2016_WH8102', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=23, phage_id=24, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH8102_TPM_std.tsv', norm = 'TPM_std',
               name = 'Doron_2016_WH8102', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=23, phage_id=24, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH8102_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Doron_2016_WH8102', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=23, phage_id=24, doi = doi, upload_date = '25-08-12')


In [42]:
# [41] Add Doron WH8109 dataset

description = 'Infection of Synechococcus sp. WH8109 with T4-like Synechococcus phage syn9. Samlpes were taken 5, 30, 60, 120 min post-infection in biological duplicates (n=2). [GSE74921]'
doi = 'https://doi.org/10.1038/ismej.2015.210'

with app.app_context():
    hostAdd = Host(name = 'Synechococcus WH8109', description = 'Synechococcus sp. WH8109', ncbi_id = 'CP006882', group = 'Synechococcus species')
    db.session.add(hostAdd)
    db.session.commit()

with app.app_context():
    hostgff = pd.read_csv('Datasets/Doron_2016/notebook/Doron_WH8109_host_gff3.tsv', sep='\t', comment = '#')
    hostgenome = pickle.dumps(hostgff)
    hostGFF = HostGenome(name = 'Synechococcus sp. WH8109 genome', host_id = 24, gff_data = hostgenome)
    db.session.add(hostGFF)
    db.session.commit()

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH8109_fractional_expression.tsv', norm = 'fractional',
               name = 'Doron_2016_WH8109', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=24, phage_id=24, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH8109_full_TPM.tsv', norm = 'TPM',
               name = 'Doron_2016_WH8109', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=24, phage_id=24, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH8109_TPM_means.tsv', norm = 'TPM_means',
               name = 'Doron_2016_WH8109', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=24, phage_id=24, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH8109_TPM_std.tsv', norm = 'TPM_std',
               name = 'Doron_2016_WH8109', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=24, phage_id=24, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Doron_2016/notebook/Doron_WH8109_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Doron_2016_WH8109', year = 2016, journal = 'ISME', firstauthor = 'Doron', pubmedID = 26623542, description = description, host_id=24, phage_id=24, doi = doi, upload_date = '25-08-12')


In [43]:
# [42] Add Halleran dataset

description = 'Cluster A4 mycobacteriophage infecting Mycobacterium smegmatis (model organism for mycobacterium tubercolosis) at MOI=10. Samples were taken before (0) and 5, 15, 30, 60, 120 min post-infection in biological duplicates (n=2). [GSE60592]'
doi = 'https://doi.org/10.1371/journal.pone.0141100'

with app.app_context():
    addFullDataset(phageName='Kampy', phageDes = 'A4 Mycobacterium phage Kampy', phageID = 'KJ510414.1', phageType = 'virulent',
               hostName = 'M. smegmatis strain mc2 155', hostDes = 'Mycobacterium smegmatis strain mc2 155', hostID = 'NC_018289.1', hostGroup = 'Mycobacterium smegmatis',
               rnaSeqPath = 'Datasets/Halleran_2015/notebook/Halleran_fractional_expression.tsv', norm = 'fractional',
               name = 'Halleran_2015', year = 2015, journal = 'PLOS One', firstauthor = 'Halleran', pubmedID = 26513661, description = description, doi = doi, upload_date = '25-08-12',
               phageGenomePath = 'Datasets/Halleran_2015/notebook/Halleran_phage_gff3.tsv', phageGenomeName = 'A4 Mycobacterium phage Kampy genome',
               hostGenomePath = 'Datasets/Halleran_2015/notebook/Halleran_host_gff3.tsv', hostGenomeName = 'M. smegmatis strain mc2 155 genome')

with app.app_context():
    addSingleDataset(rnaSeqPath = 'Datasets/Halleran_2015/notebook/Halleran_full_TPM.tsv', norm = 'TPM',
               name = 'Halleran_2015', year = 2015, journal = 'PLOS One', firstauthor = 'Halleran', pubmedID = 26513661, description = description, host_id=25, phage_id=25, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Halleran_2015/notebook/Halleran_TPM_means.tsv', norm = 'TPM_means',
               name = 'Halleran_2015', year = 2015, journal = 'PLOS One', firstauthor = 'Halleran', pubmedID = 26513661, description = description, host_id=25, phage_id=25, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Halleran_2015/notebook/Halleran_TPM_std.tsv', norm = 'TPM_std',
               name = 'Halleran_2015', year = 2015, journal = 'PLOS One', firstauthor = 'Halleran', pubmedID = 26513661, description = description, host_id=25, phage_id=25, doi = doi, upload_date = '25-08-12')
    addSingleDataset(rnaSeqPath = 'Datasets/Halleran_2015/notebook/Halleran_full_raw_counts.tsv', norm = 'raw_counts',
               name = 'Halleran_2015', year = 2015, journal = 'PLOS One', firstauthor = 'Halleran', pubmedID = 26513661, description = description, host_id=25, phage_id=25, doi = doi, upload_date = '25-08-12')