In [24]:
import os
import sys
import yaml
import numpy as np 
import pandas as pd
import io
import re
import csv
import subprocess
import requests
from snakemake.exceptions import print_exception, WorkflowError

relative_dir = "/vortexfs1/omics/alexander/akrinos/euknique" # ".." if we're running from the scripts or something 
with open(relative_dir + "/config.yaml", "r") as configfile:
    config = yaml.load(configfile)
    
relative_dir = config["homedir"]
    
INPUTDIR = config["inputdir"]
SUBDIRECTDIR = config["subdirectory"]
REFERENCEDIR = config["referencedir"]
DATADIR = config["datadir"]

samples_avail = pd.read_csv("../data/forNCBI_MMETSP.csv")
sample_names = list(samples_avail.SAMPLE_NAME)
sample_names = [curr.split("C")[0].split("_")[0] for curr in sample_names]
    
##### CREATE SUBDIRECTORY TABLE #####

# We want to get the location of all the files we're interested in using.
sampledirs = os.listdir(INPUTDIR)

subdirectory_table = pd.DataFrame({'Directory': [], \
                                   'File': [], \
                                   'Index': []})

for s in sampledirs:
    files = list(set([p.split("_R")[0] for p in os.listdir(os.path.join(INPUTDIR, s))]))
    indices = [p.split("_S")[0] for p in files]
    indices = ["".join(i for i in index if not i.isdigit()) for index in indices]
    indices = [index.split("_") for index in indices]
    
    # this is only if you have the S000X in the file name
    indices = [[i for i in indices_short if i != 'S'] for indices_short in indices]   

    # AX3 indicates that we have an infected form of amoebaphyra
    if "AX3" in files[0].split("_"):
        indices[0].append("AB") 
    indices = ["_".join(sorted(list(set([i for i in index if i])))) for index in indices]
    thisset = pd.DataFrame({'Directory': [s] * len(files), \
                            'File': files, \
                            'Index': indices})
    subdirectory_table = subdirectory_table.append(thisset)

print(subdirectory_table)
subdirectory_table.to_csv(path_or_buf = os.path.join(relative_dir,SUBDIRECTDIR), sep = "\t")

  Directory                       File        Index
0     S0007  PN3_TP3_AX1_HT1_071119_S7  AX_HT_PN_TP
0     S0006            AX3_071119_2_S6        AB_AX
0     S0001            AX3_071119_1_S1        AB_AX
0     S0008          PN3_PN2_071119_S8           PN
0     S0003          TP1_TP2_071119_S3           TP
0     S0002          AX1_HT1_071119_S2        AX_HT
0     S0004          AX2_HT2_071119_S4        AX_HT
0     S0005          AX4_PN1_071119_S5        AX_PN


  


In [12]:
print(config)

{'homedir': '/vortexfs1/omics/alexander/akrinos/euknique', 'inputdir': '/vortexfs1/omics/alexander/data/single-cell/2019-08-singlecell/WH_Pilot', 'outputdir': '/vortexfs1/omics/alexander/data/single-cell/alevin-WHPilot-11182019-cb0.2', 'scratch': '/vortexfs1/scratch/akrinos/drop-seq', 'indexdir': 'data/indices/', 'datadir': 'data/', 'referencedir': '/vortexfs1/omics/alexander/data/mmetsp/', 'configlist': 'alex,thaps,pn,het,amoeb', 'smallnamelist': 'AX,TP,PN,HT,AB', 'makesalmon': 1, 'maketg': 1, 'alex': 'Alexandrium-fundyense_MMETSP0347,Alexandrium-fundyense_MMETSP0196', 'ehux': 'Emiliania-huxleyi-374', 'thaps': 'Thalassiosira-sp._MMETSP1071', 'pn': 'Pseudo-nitzschia-pungens_MMETSP1060', 'het': 'Heterocapsa-triquestra_MMETSP0448', 'amoeb': 'Amoebophrya_MMETSP0795', 'subdirectory': 'data/subdirectory_table.tsv', 'date': '071119'}


In [32]:

# The org_list is the name of all the organisms we want to get references for
# This list has other entries in the configfile corresponding to the 
# reference we wish to use for that organism 
org_list = config["configlist"].split(",")
# The short_names list is the two-letter codes of these organisms
short_names = config["smallnamelist"].split(",")

# Now we'll build a list of repeated entries based on how many references
# we're using for each organism and where they are
list_orgs_short = []
MMETSP_names = []
list_orgs = []
for curr_num in range(len(org_list)):
    curr = org_list[curr_num]
    MMname_list = config[curr].split(",")
    MMname_curr = []
    orgnames_curr = []
    for mm in MMname_list:
        MMname_curr.append(mm.split("_")[1])
        orgnames_curr.append(mm.split("_")[0])
        
    list_orgs_short.append(short_names[curr_num])
    MMETSP_names.append(MMname_curr) # we want to add a list of entries if we have multiple transcriptomes/species
    list_orgs.append(orgnames_curr)
print(MMETSP_names)

[['MMETSP0347', 'MMETSP0196'], ['MMETSP1071'], ['MMETSP1060'], ['MMETSP0448'], ['MMETSP0795']]


In [39]:

# Make a directory for each species of interest and then save the FASTA files from Zenodo corresponding to each given species of interest
files_written = []
for gg in range(0,len(MMETSP_names)):
    file_names = []
    for f in MMETSP_names[gg]:
        # if the current sample is not in the MMETSP list
        print(f)
        if f not in sample_names:
            file_names.append(os.path.join(DATADIR,"Acatassembly.fasta"))
        else:
            file_names.append(os.path.join(REFERENCEDIR, f + "_clean.fasta"))
        
    species_dir_name = os.path.join(relative_dir, DATADIR) 
    for f in range(0, len(file_names)):
        curr_file = file_names[f]
        to_write = species_dir_name + list_orgs[gg][f].replace(" ", "") + "_" + MMETSP_names[gg][f] + "_nt.fasta"
        os.system("cp " + curr_file + " " + to_write)

    os.system("cat " + " ".join(file_names) + " > " + os.path.join(relative_dir, DATADIR, list_orgs_short[gg].replace(" ", "") + "_" + "combined" + "_nt.fasta"))
    to_write = os.path.join(relative_dir, DATADIR, list_orgs_short[gg].replace(" ", "") + "_" + "combined" + "_nt.fasta")
    print(to_write)
    files_written.append(to_write) # need to extend if using list option

MMETSP0347
MMETSP0196
/vortexfs1/omics/alexander/akrinos/euknique/data/AX_combined_nt.fasta
MMETSP1071
/vortexfs1/omics/alexander/akrinos/euknique/data/TP_combined_nt.fasta
MMETSP1060
/vortexfs1/omics/alexander/akrinos/euknique/data/PN_combined_nt.fasta
MMETSP0448
/vortexfs1/omics/alexander/akrinos/euknique/data/HT_combined_nt.fasta
MMETSP0795
/vortexfs1/omics/alexander/akrinos/euknique/data/AB_combined_nt.fasta


In [29]:
# Iterate through the combined files of transcriptomes we just wrote
counter = 0
for ff in range(0,len(files_written)):
    f = files_written[ff]
    g = MMETSP_names[ff]
    short_name = list_orgs_short[ff]
    
    file_loc = "../data/tgMap_" + short_name + ".tsv"#list_orgs[ff].replace(" ", "-") + "_" + g + ".tsv"
    os.system("touch " + file_loc)

    with open("../data/tgMap.tsv", 'wt') as tgMap_file:
        transcript_to_gene = csv.writer(tgMap_file, delimiter='\t')
        command = str("cat " + str(f) + " | grep \">\" | cut -f2 -d \">\" | cut -f1 -d \" \" > transcript_names.txt")
        #else:
        #    command = str("cat " + str(f) + " | grep \">\" | cut -f2 -d \">\" > transcript_names.txt")
        os.system(command)
        transcripts = open("transcript_names.txt", "r")
        for transcript in transcripts:
            #if g == "combined":
            #    genes = [transcript.replace("\n",""), list_orgs_short[ff] + "-" + transcript.split("TRINITY_")[1].split("_")[0]]
            #else:
            genes = [transcript.replace("\n",""), list_orgs_short[ff] + "-" + "DN" + transcript.split("|")[3].replace("\n", "")]
            counter = counter + 1
            transcript_to_gene.writerow(genes)
    
    os.system("mv " + os.path.join(relative_dir,"data/tgMap.tsv ") + file_loc)
    tgMap_file.close()

print("Done!")

['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|3167', 'AX-DN3167']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|3176', 'AX-DN3176']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|3178', 'AX-DN3178']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|3183', 'AX-DN3183']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|3191', 'AX-DN3191']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|3194', 'AX-DN3194']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|3184', 'AX-DN3184']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|60', 'AX-DN60']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|67', 'AX-DN67']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|61', 'AX-DN61']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|85', 'AX-DN85']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|79', 'AX-DN79']
['gnl|Alexan

['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|32250', 'AX-DN32250']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|32252', 'AX-DN32252']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|32254', 'AX-DN32254']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|32034', 'AX-DN32034']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|32264', 'AX-DN32264']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|32294', 'AX-DN32294']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|32297', 'AX-DN32297']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|32303', 'AX-DN32303']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|32305', 'AX-DN32305']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|32394', 'AX-DN32394']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347-20130606|23181', 'AX-DN23181']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0347

['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|8474', 'AX-DN8474']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|716', 'AX-DN716']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|715', 'AX-DN715']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|720', 'AX-DN720']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|721', 'AX-DN721']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|713', 'AX-DN713']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|710', 'AX-DN710']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|723', 'AX-DN723']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|726', 'AX-DN726']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|729', 'AX-DN729']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|731', 'AX-DN731']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|734', 'AX-DN734']
['gnl|Alexandr

['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|6146', 'AX-DN6146']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|6164', 'AX-DN6164']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|6172', 'AX-DN6172']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|6178', 'AX-DN6178']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|6185', 'AX-DN6185']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|6190', 'AX-DN6190']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|6198', 'AX-DN6198']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|6201', 'AX-DN6201']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|6209', 'AX-DN6209']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|6007', 'AX-DN6007']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|6010', 'AX-DN6010']
['gnl|Alexandrium_fundyense_Strain_CCMP1719|MMETSP0196-20130402|6019', 'AX-D

['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|32880', 'TP-DN32880']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|32881', 'TP-DN32881']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|32878', 'TP-DN32878']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|32949', 'TP-DN32949']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|32950', 'TP-DN32950']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|32951', 'TP-DN32951']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|32696', 'TP-DN32696']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|32697', 'TP-DN32697']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|33102', 'TP-DN33102']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|33103', 'TP-DN33103']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|33104', 'TP-DN33104']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|33177', 'TP-DN33177']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|33179', 'TP-DN33179']

['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|30141', 'TP-DN30141']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|30146', 'TP-DN30146']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|30161', 'TP-DN30161']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|30162', 'TP-DN30162']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|30163', 'TP-DN30163']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|30164', 'TP-DN30164']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|30174', 'TP-DN30174']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|30286', 'TP-DN30286']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|30287', 'TP-DN30287']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|30014', 'TP-DN30014']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|30016', 'TP-DN30016']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|30017', 'TP-DN30017']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|30019', 'TP-DN30019']

['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|24800', 'TP-DN24800']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|24802', 'TP-DN24802']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|24805', 'TP-DN24805']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|24810', 'TP-DN24810']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|24818', 'TP-DN24818']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|24847', 'TP-DN24847']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|24849', 'TP-DN24849']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|24851', 'TP-DN24851']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|24617', 'TP-DN24617']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|24618', 'TP-DN24618']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|24619', 'TP-DN24619']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|24621', 'TP-DN24621']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|24622', 'TP-DN24622']

['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|26489', 'TP-DN26489']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|26490', 'TP-DN26490']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|26146', 'TP-DN26146']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|26219', 'TP-DN26219']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|26221', 'TP-DN26221']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|26223', 'TP-DN26223']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|26224', 'TP-DN26224']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|26225', 'TP-DN26225']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|26321', 'TP-DN26321']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|26320', 'TP-DN26320']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|26322', 'TP-DN26322']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|26323', 'TP-DN26323']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|26420', 'TP-DN26420']

['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|27616', 'TP-DN27616']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|27628', 'TP-DN27628']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|27404', 'TP-DN27404']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|27405', 'TP-DN27405']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|27406', 'TP-DN27406']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|27496', 'TP-DN27496']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|27497', 'TP-DN27497']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|27500', 'TP-DN27500']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|27501', 'TP-DN27501']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|27530', 'TP-DN27530']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|27562', 'TP-DN27562']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|27563', 'TP-DN27563']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|27565', 'TP-DN27565']

['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|12964', 'TP-DN12964']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|12965', 'TP-DN12965']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|12967', 'TP-DN12967']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|12968', 'TP-DN12968']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|13113', 'TP-DN13113']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|13114', 'TP-DN13114']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|13166', 'TP-DN13166']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|13167', 'TP-DN13167']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|13168', 'TP-DN13168']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|13164', 'TP-DN13164']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|13232', 'TP-DN13232']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|13233', 'TP-DN13233']
['gnl|Thalassiosira_sp_Strain_NH16|MMETSP1071-20121207|13231', 'TP-DN13231']

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|4679', 'PN-DN4679']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|4678', 'PN-DN4678']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|4688', 'PN-DN4688']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|4776', 'PN-DN4776']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|4777', 'PN-DN4777']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|4453', 'PN-DN4453']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|4451', 'PN-DN4451']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|4734', 'PN-DN4734']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|4574', 'PN-DN4574']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|4581', 'PN-DN4581']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|4587', 'PN-DN4587']

['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|53590', 'PN-DN53590']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|53596', 'PN-DN53596']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|53593', 'PN-DN53593']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|55149', 'PN-DN55149']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|55154', 'PN-DN55154']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|55210', 'PN-DN55210']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|55201', 'PN-DN55201']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|55223', 'PN-DN55223']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|55252', 'PN-DN55252']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|55253', 'PN-DN55253']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228

['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|70360', 'PN-DN70360']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|70361', 'PN-DN70361']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|70409', 'PN-DN70409']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|70609', 'PN-DN70609']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|70611', 'PN-DN70611']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|39962', 'PN-DN39962']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|39980', 'PN-DN39980']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|40022', 'PN-DN40022']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|40060', 'PN-DN40060']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228|40064', 'PN-DN40064']
['gnl|Pseudo-nitzschia_pungens_Strain_cf_cingulata|MMETSP1060-20121228

['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|907', 'HT-DN907']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|913', 'HT-DN913']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|920', 'HT-DN920']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|922', 'HT-DN922']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|925', 'HT-DN925']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|273', 'HT-DN273']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|276', 'HT-DN276']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|280', 'HT-DN280']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|282', 'HT-DN282']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|283', 'HT-DN283']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|285', 'HT-DN285']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|286', 'HT-DN286']
['gn

['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27368', 'HT-DN27368']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27376', 'HT-DN27376']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27450', 'HT-DN27450']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27452', 'HT-DN27452']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27453', 'HT-DN27453']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27455', 'HT-DN27455']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27458', 'HT-DN27458']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|28116', 'HT-DN28116']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|28269', 'HT-DN28269']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|28270', 'HT-DN28270']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|28275', 'HT-DN28275']
['gnl|Heterocapsa_triquestra_Strain_CCMP_44

['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|59040', 'HT-DN59040']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|59055', 'HT-DN59055']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|59065', 'HT-DN59065']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|59100', 'HT-DN59100']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|59126', 'HT-DN59126']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|59127', 'HT-DN59127']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|59171', 'HT-DN59171']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|59178', 'HT-DN59178']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|59186', 'HT-DN59186']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|59192', 'HT-DN59192']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|59196', 'HT-DN59196']
['gnl|Heterocapsa_triquestra_Strain_CCMP_44

['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27748', 'HT-DN27748']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27753', 'HT-DN27753']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27751', 'HT-DN27751']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27328', 'HT-DN27328']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27329', 'HT-DN27329']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27334', 'HT-DN27334']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27333', 'HT-DN27333']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|27337', 'HT-DN27337']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|28243', 'HT-DN28243']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|28244', 'HT-DN28244']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|28242', 'HT-DN28242']
['gnl|Heterocapsa_triquestra_Strain_CCMP_44

['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|8911', 'HT-DN8911']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|9077', 'HT-DN9077']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|9075', 'HT-DN9075']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|9086', 'HT-DN9086']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|9085', 'HT-DN9085']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|9089', 'HT-DN9089']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|9156', 'HT-DN9156']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|9159', 'HT-DN9159']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|9158', 'HT-DN9158']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|9160', 'HT-DN9160']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|9989', 'HT-DN9989']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|

['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|6777', 'HT-DN6777']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|6786', 'HT-DN6786']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|6790', 'HT-DN6790']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|6795', 'HT-DN6795']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|6796', 'HT-DN6796']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|6797', 'HT-DN6797']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|6965', 'HT-DN6965']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|6971', 'HT-DN6971']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|6968', 'HT-DN6968']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|6972', 'HT-DN6972']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|6974', 'HT-DN6974']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|173486', 'HT-DN173486']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|172541', 'HT-DN172541']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|172544', 'HT-DN172544']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|172552', 'HT-DN172552']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|172557', 'HT-DN172557']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|173130', 'HT-DN173130']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|173131', 'HT-DN173131']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|173132', 'HT-DN173132']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|173138', 'HT-DN173138']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|173741', 'HT-DN173741']
['gnl|Heterocapsa_triquestra_Strain_CCMP_448|MMETSP0448-20130528|173750', 'HT-DN173750']
['gnl|Heterocapsa_tri

['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|16227', 'AB-DN16227']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|16228', 'AB-DN16228']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|16448', 'AB-DN16448']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|16449', 'AB-DN16449']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|16279', 'AB-DN16279']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|16333', 'AB-DN16333']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|16334', 'AB-DN16334']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|16318', 'AB-DN16318']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|16319', 'AB-DN16319']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|16320', 'AB-DN16320']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|16373', 'AB-DN16373']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|16376', 'AB-DN16376']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|16378', 'AB-DN16378']

['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1280', 'AB-DN1280']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1281', 'AB-DN1281']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1333', 'AB-DN1333']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1334', 'AB-DN1334']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1335', 'AB-DN1335']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1336', 'AB-DN1336']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1405', 'AB-DN1405']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1407', 'AB-DN1407']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1408', 'AB-DN1408']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1667', 'AB-DN1667']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1679', 'AB-DN1679']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1499', 'AB-DN1499']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1509', 'AB-DN1509']
['gnl|Amoebophrya_sp_Stra

['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1949', 'AB-DN1949']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1950', 'AB-DN1950']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1951', 'AB-DN1951']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1953', 'AB-DN1953']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1744', 'AB-DN1744']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1746', 'AB-DN1746']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1747', 'AB-DN1747']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1748', 'AB-DN1748']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|1807', 'AB-DN1807']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|2085', 'AB-DN2085']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|2163', 'AB-DN2163']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|2165', 'AB-DN2165']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|2166', 'AB-DN2166']
['gnl|Amoebophrya_sp_Stra

['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|9807', 'AB-DN9807']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|9808', 'AB-DN9808']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|10269', 'AB-DN10269']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|9824', 'AB-DN9824']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|9825', 'AB-DN9825']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|9890', 'AB-DN9890']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|9891', 'AB-DN9891']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|9892', 'AB-DN9892']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|9780', 'AB-DN9780']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|9843', 'AB-DN9843']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|9844', 'AB-DN9844']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|9848', 'AB-DN9848']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|9911', 'AB-DN9911']
['gnl|Amoebophrya_sp_St

['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|12910', 'AB-DN12910']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|12911', 'AB-DN12911']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|12914', 'AB-DN12914']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|12304', 'AB-DN12304']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|12374', 'AB-DN12374']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|12376', 'AB-DN12376']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|12377', 'AB-DN12377']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|12435', 'AB-DN12435']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|12437', 'AB-DN12437']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|12440', 'AB-DN12440']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|12602', 'AB-DN12602']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|12603', 'AB-DN12603']
['gnl|Amoebophrya_sp_Strain_Ameob2|MMETSP0795-20121207|12884', 'AB-DN12884']