In [None]:
import os

from pepars.fileio import fileio

from protfarm.workspace import Workspace as ws
from protfarm.workspace import Database as db
from protfarm.workspace import FASTQ_File as FASTQ_File
from protfarm.workspace import Library

In [None]:
# The data path represents the location of all protein engineering sequencing experiments
DATA_PATH = os.path.join("..", "example_data")

# Each experiment is given its own name and subdirectory in the DATA PATH
# An experiment is a group of samples, all using the same variant region
EXPERIMENT_NAME = "demo"

# A map of samples and the FASTQ files associated with each - this can be entered manually or read
# in from an Excel sheet
SAMPLE_FASTQ_FILES = {
    "sample_1": ["sample_1.fastq.gz"],
    "sample_2": ["sample_2.fastq.gz"]
}

In [None]:
# Set the data and experiment path. If this experiment doesn't exist already, an empty one will be created
ws.set_data_path(DATA_PATH)
ws.set_experiment(EXPERIMENT_NAME)

In [None]:
# Seed our experiment with some FASTQ files to download
REMOTE_FILES = [
    ("https://caltech.box.com/shared/static/5a1zi1pawtn1x15tupr1pub01wqa5kfg.gz", "sample_1.fastq.gz"),
    ("https://caltech.box.com/shared/static/fvu4uq3bjuur2hufjlzit0ijt3m1ji2i.gz", "sample_2.fastq.gz")
]

for remote_file_URL, local_file_name in REMOTE_FILES:
    
    # FASTQ files go in the raw data folder
    FASTQ_file_path = ws.get_raw_data_path(local_file_name)

    # This downloads the file, but only if it doesn't already exist
    fileio.download_remote_file(remote_file_URL, FASTQ_file_path)

# Reload the workspace to get the newly added FASTQ file
ws.set_experiment(EXPERIMENT_NAME)

In [None]:
# Create libraries and associate FASTQ files for each of them
for sample_name, FASTQ_file_names in SAMPLE_FASTQ_FILES.items():
    
    try:
        library = Library(sample_name)
    except Exception as e:
        library = db.get_library(sample_name)

    for FASTQ_file_name in FASTQ_file_names:
        library.add_file(FASTQ_file_name)

In [None]:
# Make sure all samples are appropriately registered
print("\n".join(["%s: %s" % (sample.name, sample.fastq_files) for sample in db.get_samples()]))