# Setup Conda Environment

First, make sure you have a working `conda` installation. 

Next, run the code block below to create a new environment containing all the dependencies. Once the command runs, you'll need to close the notebook, activate the new environment with `conda activate lrr_annot`, and startup the notebook again. 

In [1]:
!if conda info --envs | grep -q "lrr_annot"; then echo "Environment 'lrr_annot' already exists."; else !conda env create -n lrr_annot -f "lrr_annot.yml"; fi

Environment 'lrr_annot' already exists.


Make sure that the output of the codeblock below says `active environment : lrr_annot`!

In [2]:
!conda info | grep -E "active environment"

     active environment : lrr_annot


In [3]:
from methods import * # import all methods

## Select dataset

Use the dialog below to select a folder of PDB files you'd like to analyze.

In [4]:
import ipywidgets as widgets
from ipyfilechooser import FileChooser

fc = FileChooser('') # file chooser widget
display(fc)

FileChooser(path='/Users/amc/Desktop/final', filename='', title='', show_hidden=False, select_desc='Select', c…

In [5]:
pdb_dir = fc.value

In [6]:
filenames = get_files_with_ext(pdb_dir, '.pdb') # retrieve list of filenames
protlist = [os.path.splitext(file)[0] for file in filenames] # give each protein a unique identifier
print('Protein identifiers:\n\n', protlist)

Protein identifiers:

 ['1064PERA', '1355D3IA', '984MN8A', '202IFGA', '172ELLA', '352Z63A', '633TSRE', '242O6SA', '21IO0A', '814ECNA', '994MN8B', '423B2DA', '1074PKIG', '834EZGA', '764B8CD', '613SB4A', '673VQ2A', '121XKUA', '362Z66A', '663V44A', '51OGQA', '733ZYOA', '413A79B', '1124Q62A', '583RG1A', '443CIGA', '1495Y30A', '1636FG8A', '683WN4A', '503J0AA', '1686GYUA', '1596DBGA', '794CNMA', '1365GR8A', '342Z62A', '954KXFB', '914H09A', '884G8AA', '372Z7XA', '653UN9A', '1324Z79A', '111XEUA', '31JL5A', '1606EJXA', '1455WLSA', '1515YQ5A', '232O6RA', '1054PEQB', '1204TZHA', '1334Z8GA', '1726K2DB', '693WO9A', '894GLPA', '1776R1HA', '1646FG8B', '1044PBVA', '1676GFFI', '1024OW2A', '1254V2EA', '1545Z8XA', '804CP6A', '974LXRA', '1766OBPC', '453E4GA', '1194RWTC', '1385HZLB', '101WWLA', '392Z80A', '1746NPYA', '904GT6A', '1264XA9A', '1294Y61B', '11H6UA', '1014OJUA', '1274XGOA', '784CILA', '1284XSQA', '1345B0NA', '754AY9X', '824ECOA', '874FS7A', '1174R6GA', '553OGKB', '1525YUDA', '1244UFRA', '593RGZA

In [7]:
pdb_dir

'/Users/amc/git/Boyan/lrr-annot/lrrpredictor_pdbs/'

## Caching and Plotting

Select a directory for caching the analysis data, if you'd like not to recompute all the analyses. Then, select a directory to save plots. 

In [8]:
cache_dir = None
fc2 = FileChooser('') # another file chooser widget 
print('Choose directory to cache analyses:')
display(fc2)

Choose directory to cache analyses:


FileChooser(path='/Users/amc/Desktop/final', filename='', title='', show_hidden=False, select_desc='Select', c…

In [9]:
cache_dir = fc2.value
cache_dir

'/Users/amc/Desktop/final/cache/'

In [10]:
plot_dir = None
fc3 = FileChooser('') # another file chooser widget 
print('Choose directory to save plots:')
display(fc3)

Choose directory to save plots:


FileChooser(path='/Users/amc/Desktop/final', filename='', title='', show_hidden=False, select_desc='Select', c…

In [11]:
plot_dir = fc3.value
plot_dir

'/Users/amc/Desktop/final/plots/'

In [12]:
from tqdm import tqdm
import pickle

# if there is cached
if cache_dir is None:
    pass
else:
    cutoff_d = {}
    std_d = {}
    regression_d = {}
    for protid in tqdm(protlist):
        preX = get_backbone_from_pdb(f'{pdb_dir}{protid}.pdb')
        winding, m, parameters = get_regression(preX)
        regression_d[protid] = (winding, m, parameters)
        
        stddev = plot_regression(winding, parameters, m, save = True, filename = f'{plot_dir}{protid}')
        std_d[protid] = stddev
        cutoff_d[protid] = tuple(parameters.astype('int'))
    with open(f'{cache_dir}cutoff_d.pickle', 'wb') as handle:
        pickle.dump(cutoff_d, handle)
    with open(f'{cache_dir}std_d.pickle', 'wb') as handle:
        pickle.dump(std_d, handle)
    with open(f'{cache_dir}regression_d.pickle', 'wb') as handle:
        pickle.dump(regression_d, handle)

 41%|█████████████████████████████████▋                                                | 73/178 [00:49<01:10,  1.48it/s]


ValueError: min() arg is an empty sequence

In [15]:
slopes

NameError: name 'slopes' is not defined

## Generating plots from cached data

In [23]:
cutoff_d = {}
std_d = {}
regression_d = {}

In [22]:
def analyze(*args, **kwargs):
    if cache_dir is None:
        for protid in tqdm(protlist):
            preX = get_backbone_from_pdb(f'{pdb_dir}{protid}.pdb')
            winding, m, parameters = get_regression(preX)
            regression_d[protid] = (winding, m, parameters)
    else:
        with open(f'{cache_dir}cutoff_d.pickle', 'wb') as handle:
            pickle.dump(cutoff_d, handle)
        with open(f'{cache_dir}std_d.pickle', 'wb') as handle:
            pickle.dump(std_d, handle)
        with open(f'{cache_dir}regression_d.pickle', 'wb') as handle:
            pickle.dump(regression_d, handle)

def make_plots(*args, **kwargs):
    for protid, (winding, parameters, m) in regression_d:
        stddev = plot_regression(winding, parameters, m, save = True, filename = f'{plot_dir}{protid}')
        std_d[protid] = stddev
        cutoff_d[protid] = tuple(parameters.astype('int'))
            
if not cache_dir is None:
    cache_button = widgets.Button(
        description = 'Load cached analysis',
        disabled = False,
        button_style = '', # 'success', 'info', 'warning', 'danger' or ''
        tooltip = 'Load dictionaries from cache.'
    )
    
    cache_button.on_click(load_from_cache)
    display(cache_button)
            
analyze_button = widgets.Button(
    description = 'Analyze' if cache_dir is None else 'Analyze and cache',
    disabled = False,
    button_style = '', # 'success', 'info', 'warning', 'danger' or ''
    tooltip = 'Analyze the proteins in the selected folder.'
)

analyze_button.on_click(analyze)
display(analyze_button)


Button(description='Analyze', style=ButtonStyle(), tooltip='Analyze the proteins in the selected folder.')

In [71]:
def analyze(protlist):
    cutoff_d = {}
    std_d = {}
    for protid in protlist:
        preX = get_backbone_from_pdb(f'pdb/{protid}.pdb')
        winding, m, parameters = get_regression(preX)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=RuntimeWarning)
            stddev = plot_regression(winding, parameters, m, save = True, filename = f'plot/regression/{protid}')
            std_d[protid] = stddev
            cutoff_d[protid] = tuple(parameters.astype('int'))
            
    if not cache_dir is None:
        with open('pickles/cutoff_d.pickle', 'wb') as handle:
            pickle.dump(cutoff_d, handle)
        with open('pickles/std_d.pickle', 'wb') as handle:
            pickle.dump(std_d, handle)
            
    return cuttof_d, std_d

def test(*args, **kwargs):
    print(args, kwargs)
    return 0



In [46]:
x