In [1]:
import sys
sys.path.append('../')
import crisprtree
from crisprtree import utils
from crisprtree import estimators
from crisprtree import annotators

In [2]:
from Bio import SeqIO
from Bio.Seq import reverse_complement
import gzip
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sbn
from scipy.stats import hmean

In [3]:
with open('data/GCA_002855745.1_ASM285574v1_genomic.gbff') as handle:
    genome = list(SeqIO.parse(handle, 'genbank'))[0]

In [4]:
estimator = estimators.CFDEstimator.build_pipeline()

In [None]:
library_grnas = []

for feat in genome.features:
    if feat.type == 'CDS':
        product = feat.qualifiers['product'][0]
        tag = feat.qualifiers['locus_tag'][0]
        gene_record = feat.extract(genome)
        possible_targets = utils.extract_possible_targets(gene_record)
        
        genome_minus_gene = genome[:feat.location.start] + genome[feat.location.end:]
        
        possible_binding = utils.cas_offinder(possible_targets, 5, seqs = [genome_minus_gene])
        possible_binding['Score'] = estimator.predict_proba(possible_binding.values)
        
        offtarget_scores = possible_binding.groupby('gRNA')['Score'].agg('max')
        offtarget_scores.sort_values(inplace=True)
        
        for protospacer, off_score in offtarget_scores.head().to_dict().items():
            location = genome.seq.find(protospacer)
            strand = '+'
            if location == -1:
                location = genome.seq.find(reverse_complement(protospacer))
                strand = '-'

            library_grnas.append({'Product': product,
                                  'Tag': tag,
                                  'Protospacer': protospacer,
                                  'Location': location,
                                  'Strand': strand,
                                  'Off Target Score': off_score})
        
library_df = pd.DataFrame(library_grnas)

In [None]:
best_score = library_df.groupby(['Product', 'Tag'])['Off Target Score'].min()
untargetable_products = best_score[best_score >= 0.5]
untargetable_products