# Import Packages

In [1]:
# built-in
import os
from os import path

# third-party (install required)
import pandas as pd
from pymodulon.io import load_json_model

# Load Data

## Define Data Paths

In [2]:
precise1k_path = '../data/precise1k'
phage_path = '../data/phage'

## Load Data, Metadata, and QC Stats

In [7]:
precise1k_log_tpm = pd.read_csv(path.join(precise1k_path, 'log_tpm.csv'), index_col=0)
phage_log_tpm = pd.read_csv(path.join(phage_path, 'log_tpm.csv'), index_col=0)

precise1k_metadata = pd.read_csv(path.join(precise1k_path, 'metadata.csv'), index_col=0)
# phage metadata has all the P2.0 as well
phage_metadata = pd.read_csv(path.join(phage_path, 'metadata.tsv'), index_col=0, sep='\t')
phage_metadata = phage_metadata[phage_metadata['project_id'] == 'phage']
phage_metadata = phage_metadata.rename(columns={'project_id': 'project', 'condition_id': 'condition'})

# precise2 has EEP attached; remove these so we don't duplicate
precise1k_qc_stats = pd.read_csv(path.join(precise1k_path, 'multiqc_stats.csv'), index_col=0)
phage_qc_stats = pd.read_csv(path.join(phage_path, 'multiqc_stats.tsv'), index_col=0, sep='\t')

# Merge Data

Merge together all of this data

In [15]:
log_tpm_1k = precise1k_log_tpm.merge(
    phage_log_tpm,
    left_index=True, right_index=True
)

metadata_1k = pd.concat([
    precise1k_metadata,
    phage_metadata
])

multiqc_stats_1k = pd.concat([
    precise1k_qc_stats,
    phage_qc_stats
])

# Save Data

In [16]:
log_tpm_1k.to_csv(path.join(precise1k_path, 'log_tpm.csv'))
metadata_1k.to_csv(path.join(precise1k_path, 'metadata.csv'))
multiqc_stats_1k.to_csv(path.join(precise1k_path, 'multiqc_stats.csv'))