## Purpose:
- This notebook will track the semi-manually logged failures at each step
- Advice: Parsing the snakemake output is useful to prevent "by-hand" logging which can be error-prone
- This notebook *should* be safe to rerun cells

## General Save/Load of dropped log

In [64]:
import json

def dropped(new_fails, log_path="../logs/dropped.json"):
    """ tracked dropped clusterIDs and rationale to a single log file 
    Manually curated by pipeline runner
    Downstream steps will drop these clusterIDs from analysis
    
    """
    with open(log_path, "w+") as f:
        try:
            current_fails = json.load(f)
            print(f"Adding to failed log {log_path}")
        except json.JSONDecodeError:
            print("Failure log not yet initated... initiating now")
            current_fails = dict()
        print(current_fails)
        current_fails.update(new_fails)
        print(current_fails)
        json.dump(current_fails, f)

## Clustalo Failures have been logged here

In [65]:
from collections import defaultdict

# Certain clusters necessarily failed clustalo
clustalo_fails_log = "../.snakemake/log/2020-07-21T113519.799452.snakemake.log" 


reason = "Clustalo-fails"
failed_clusterIDs = defaultdict(list)
with open(clustalo_fails_log, "r") as f:
    for line in f.readlines():
        if "clusterID" in line:
            clusterID = line.split()[-1].split("=")[1]
            failed_clusterIDs[reason].append(clusterID)
            
# now to log failed
dropped(new_fails=failed_clusterIDs)

Failure log not yet initated... initiating now
{}
{'Clustalo-fails': ['1qzv_B', '1jnv_Y', '1fe1_F', '1ivi_A', '1fka_N', '1d8s_A', '1qtj_A', '1d8s_D', '1nik_G', '1tnv_C', '1c51_D', '1fe1_I', '1fe1_B', '1izl_X', '1fe1_D', '1c51_L', '1qzv_C', '1izl_G', '1fe1_G', '1fka_Q', '1hr3_A', '1c51_K', '1c51_F', '1qzv_H', '1qzv_K', '1qzv_G', '1qzv_D', '1fe1_H', '1fka_B', '1qzv_4', '1qzv_J', '1fka_K', '1fe1_C', '1fft_D', '1fka_L', '1pyh_A', '1fka_P', '1eg0_H', '1fe1_A', '1qzv_E', '1c51_E', '1kga_A', '1fe1_E', '1bcc_I', '1fka_J', '1ffx_E', '1izl_U', '1fka_T', '1c51_C']}


In [66]:
a = list("abc")
b = [1,2,3]
c = list(zip(a,b))

In [67]:
c

[('a', 1), ('b', 2), ('c', 3)]