## Find Candidate Building Blocks

In [1]:
## Run once cell

%load_ext autoreload
%autoreload 2

import os
os.chdir('..')

In [2]:
import sys

import numpy as np
import pandas as pd

from helpers.edges import connect_edges, CascadingEdges
from helpers.cascades import Cascade, MakeCascade, Metrics, MetricsPair, MakeMetricsPair

In [3]:
path_to_data = "data/final/"
df = pd.read_pickle(path_to_data + "master_dataframe.pkl")
edges = pd.read_pickle(path_to_data + "master_edges.pkl")
df.shape, len(edges)

((251038, 7), 235229)

In [4]:
## Initialize the cascading edges
cascading_edges = CascadingEdges(edges)

In [5]:
from typing import List
from functools import reduce
from moleculib.assembly.datum import AssemblyDatum
from moleculib.protein.datum import ProteinDatum
from moleculib.graphics.py3Dmol import plot_py3dmol_grid

from moleculib.protein.transform import (
    ProteinCrop,
    TokenizeSequenceBoundaries,
    ProteinPad,
    MaybeMirror,
    BackboneOnly,
    DescribeChemistry
)

from helpers.edges import CascadingEdges


max_chain_len = 253  # max length for denim-energy model
protein_transform = [
    ProteinCrop(crop_size=max_chain_len),
    TokenizeSequenceBoundaries(),
    MaybeMirror(hand='left'),
    ProteinPad(pad_size=max_chain_len, random_position=False),
    BackboneOnly(filter=True),
    DescribeChemistry(),
]


# Given a list of PDB ids, pull them from moleculib and visualize


def transform(datum):
    return reduce(lambda x, f: f.transform(x), protein_transform, datum)

class FetchPDBids:
    """Fetch PDB ids as AssemblyDatums."""
    def __init__(self, pdb_ids: List[str]):
        self.pdb_ids = [pdb_id.lower() for pdb_id in pdb_ids]
        self.datums = []
        self.transformed = []  # list of transformed ProteinDatums

    def __call__(self):
        print(f"Fetching {len(self.pdb_ids)} PDB IDs...", end=" ")
        for pdb_id in self.pdb_ids:
            assembly = AssemblyDatum.fetch_pdb_id(pdb_id,)
            for datum in assembly.protein_data:
                # datum.idcode = pdb_id
                self.datums.append(datum)
            print(f"{pdb_id}, ", end="")
        print("\nDone")

    def togrid(self, k=None, num_columns=3, use_transformed=False):
        if k is None:
            k = len(self.datums)
        if use_transformed:
            if self.transformed == []:
                self.transform()
            datum_grid = self.make_grid(self.transformed[:k], num_columns)
        else:
            datum_grid = self.make_grid(self.datums[:k], num_columns)
        return datum_grid
    
    @staticmethod
    def make_grid(datums: List[ProteinDatum], num_columns=3):
        return [datums[i:i + num_columns] for i in range(0, len(datums), num_columns)]
    
    def transform(self):
        self.transformed = [transform(datum) for datum in self.datums]


def find_pdb(pdb_id, level=None):
    """Find matches where the pdb_id column contains `pdb_id`."""
    if level is None:
        return df[df['pdb_id'].str.contains(pdb_id, case=False)]
    return df[df['pdb_id'].str.contains(pdb_id, case=False) & (df['level'] == level)]



In [6]:
dna_restriction_enzymes = ['2OXV', '2E52', '1BAM', '2IXS', '1PVI']
beta_helix = ['2jp7', '1prp', '3nxq', '1gca', '1pcl', '1xiq', '2pqe', '1kzq', '4mzu', '1wpc', '1fnu', '4g6r', '4jj2', '3hno', '1lxa', '6ria', '1hg9', '1dcq', '1cb7', '3a1m', '4zu7', '1acc', '1l5j', '6rib', '2jer', '1air', '2d40', '2fla', '1qte', '2kl8', '1dbv', '2obg', '7jvi', '2z0q', '1yox', '1f6w', '3i48', '3zds', '4puq', '1qre', '6e5c', '1cts', '1hin', '2qnz', '3ub3', '1idj', '3obw', '1dab', '3uxh', '4osd', '4aq6', '4aq2', '4fl6', '2ln3', '1znp']

find_pdb(beta_helix[13:21][7], level=2)

Unnamed: 0,pdb_id,level,level_idx,scalar_rep,datum,pos,color
225714,4zu7B,2,0,"[0.96311736, -0.69944006, 2.0866554, -1.382588...",(((<moleculib.protein.datum.ProteinDatum objec...,"[-48.1258430480957, 25.67034149169922]","rgb(71, 76, 125)"
225715,4zu7B,2,1,"[-0.026798755, -0.4516107, 0.85225177, -1.3008...",(((<moleculib.protein.datum.ProteinDatum objec...,"[-20.903160095214844, -76.32533264160156]","rgb(17, 140, 166)"
225716,4zu7B,2,2,"[-0.05677605, -0.2647879, 0.7420596, -1.167206...",(((<moleculib.protein.datum.ProteinDatum objec...,"[25.574487686157227, -34.32466125488281]","rgb(71, 137, 183)"
225717,4zu7B,2,3,"[0.13369997, -0.1757687, 0.83641, -0.83099073,...",(((<moleculib.protein.datum.ProteinDatum objec...,"[35.3138427734375, -14.516938209533691]","rgb(134, 84, 86)"
225718,4zu7B,2,4,"[0.007192686, -0.39348882, 0.96239954, -1.2252...",(((<moleculib.protein.datum.ProteinDatum objec...,"[-9.865707397460938, 59.42068862915039]","rgb(173, 142, 100)"
...,...,...,...,...,...,...,...
226655,4zu7A,2,30,"[0.24816532, -0.25786173, 0.9149501, -0.874849...",(((<moleculib.protein.datum.ProteinDatum objec...,"[38.342926025390625, -68.9327392578125]","rgb(99, 109, 195)"
226656,4zu7A,2,31,"[0.32757628, -0.33693287, 1.0332254, -0.782243...",(((<moleculib.protein.datum.ProteinDatum objec...,"[-99.51786804199219, -66.23067474365234]","rgb(194, 22, 137)"
226657,4zu7A,2,32,"[0.25219983, -0.21873125, 1.041678, -0.8012804...",(((<moleculib.protein.datum.ProteinDatum objec...,"[61.05895233154297, -87.08849334716797]","rgb(125, 181, 192)"
226658,4zu7A,2,33,"[-0.18148027, -0.16324824, 0.7396309, -0.76507...",(((<moleculib.protein.datum.ProteinDatum objec...,"[68.59159088134766, -77.25275421142578]","rgb(109, 162, 194)"


### Search for candidates

Algorithm for searching candidates based on bottom-level threshold and upper-level threshold

In [7]:
from helpers.neighborhood import GetNeighbors, NeighborMetrics, MakeNeighborMetrics
from helpers.candidates import MakeCandidate

# get_neighbors = GetNeighbors(df)
# query_index = 249972
# dists, neighbors = get_neighbors(query_index, radius=0.00426)
# # dists, neighbors = get_neighbors(query_index, n_neighbors=4)
# print(dists)
# neighbors


make_candidate = MakeCandidate(df, edges, 249972)
candidate = make_candidate(n_neighbors_threshold=10)
candidate.eval(divergence_threshold=0.0002)
# neighbors = candidate_eval.search_candidates(0.00426, divergence_threshold=0.02)




True

In [8]:
from helpers.utils import d
import random

def generate_random_aa_sequence(length):
    letters = list(filter(lambda x: x not in ['UNK', 'MASK', 'PAD'], d.values()))
    return ''.join(random.choices(letters, k=length))

# random_sequence = generate_random_aa_sequence()
# print("Random amino acid sequence:", random_sequence)
# print("Random scaffold using this raandom motif:", scaffolded_motif(random_sequence))


# ubi = "MQIFVKTLTG KTITLEVEPS DTIENVKAKI QDKEGIPPDQ QRLIFAGKQL EDGRTLSDYN IQKESTLHLV LRLRGG"
ubiquitin_scaffold = "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG"
# MQIFVKTLT-[Motif]-GKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG

def scaffolded_motif(motif, scaffold=ubiquitin_scaffold):
    print(f"Length of motif: {len(motif)}")
    return f"{scaffold[:9]}{motif}{scaffold[9:]}"

MOTIF = "QDQTEKELFESYI"

print(f"Scaffolded motif: {scaffolded_motif(MOTIF)}")
print(f"Random: {scaffolded_motif(generate_random_aa_sequence(len(MOTIF)))}")



Length of motif: 13
Scaffolded motif: MQIFVKTLTQDQTEKELFESYIGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG
Length of motif: 13
Random: MQIFVKTLTDNYWLVSWGASKAGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG


In [9]:
from moleculib.protein.datum import ProteinDatum
from moleculib.protein.alphabet import all_residues
from helpers.utils import aa_map, residue_map

def datum_to_sequence(datum):
    """Given a datum object, return the sequence of the protein."""
    return [all_residues[token] for token in datum.residue_token]

full_protein = ProteinDatum.fetch_pdb_id("1c5e", format="pdb")
print(residue_map(full_protein.residue_token))

['SER', 'ASP', 'PRO', 'ALA', 'HIS', 'THR', 'ALA', 'THR', 'ALA', 'PRO', 'GLY', 'GLY', 'LEU', 'SER', 'ALA', 'LYS', 'ALA', 'PRO', 'ALA', 'MET', 'THR', 'PRO', 'LEU', 'MET', 'LEU', 'ASP', 'THR', 'SER', 'SER', 'ARG', 'LYS', 'LEU', 'VAL', 'ALA', 'TRP', 'ASP', 'GLY', 'THR', 'THR', 'ASP', 'GLY', 'ALA', 'ALA', 'VAL', 'GLY', 'ILE', 'LEU', 'ALA', 'VAL', 'ALA', 'ALA', 'ASP', 'GLN', 'THR', 'SER', 'THR', 'THR', 'LEU', 'THR', 'PHE', 'TYR', 'LYS', 'SER', 'GLY', 'THR', 'PHE', 'ARG', 'TYR', 'GLU', 'ASP', 'VAL', 'LEU', 'TRP', 'PRO', 'GLU', 'ALA', 'ALA', 'SER', 'ASP', 'GLU', 'THR', 'LYS', 'LYS', 'ARG', 'THR', 'ALA', 'PHE', 'ALA', 'GLY', 'THR', 'ALA', 'ILE', 'SER', 'ILE', 'VAL', 'SER', 'ASP', 'PRO', 'ALA', 'HIS', 'THR', 'ALA', 'THR', 'ALA', 'PRO', 'GLY', 'GLY', 'LEU', 'SER', 'ALA', 'LYS', 'ALA', 'PRO', 'ALA', 'MET', 'THR', 'PRO', 'LEU', 'MET', 'LEU', 'ASP', 'THR', 'SER', 'SER', 'ARG', 'LYS', 'LEU', 'VAL', 'ALA', 'TRP', 'ASP', 'GLY', 'THR', 'THR', 'ASP', 'GLY', 'ALA', 'ALA', 'VAL', 'GLY', 'ILE', 'LEU', 'ALA'

#### Specific Neighbor Search

In [10]:

neighbor_metrics, distances, top_vectors =  MakeNeighborMetrics(df, edges, 168070)(n_neighbors=5)
# print(neighbor_metrics)
neighbor_metrics.plot()

Query: 1dqpA. part sequence: NGVNRY


Neighbor 0 at index 167855, PDB ID: 1dqpB. part sequence: NGVNRY -- Alignment: 6.0, RMSD: 0.0128, cosine: 0.000007


Neighbor 1 at index 32400, PDB ID: 1a3aC. part sequence: LLAGRK -- Alignment: 2.0, RMSD: 0.1558, cosine: 0.000275


Neighbor 2 at index 162197, PDB ID: 1a2xA. part sequence: MMEGVQ -- Alignment: 2.0, RMSD: 0.3158, cosine: 0.000327


Neighbor 3 at index 48481, PDB ID: 1ewyC. part sequence: KEEDLY -- Alignment: 1.0, RMSD: 0.6314, cosine: 0.000544


Neighbor 4 at index 188167, PDB ID: 1au1A. part sequence: TGYLRN -- Alignment: 2.0, RMSD: 0.2898, cosine: 0.000569


In [11]:
from helpers.utils import aa_map
from helpers.utils import aa_map, residue_map
from helpers.cascades import MakeMetricsPair

make_cascades = CascadingEdges(edges)

# ubi = "MQIFVKTLTG KTITLEVEPS DTIENVKAKI QDKEGIPPDQ QRLIFAGKQL EDGRTLSDYN IQKESTLHLV LRLRGG"
ubiquitin_scaffold = "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG"
# MQIFVKTLT-[Motif]-GKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG

def scaffolded_motif(motif, scaffold=ubiquitin_scaffold):
    # Position 8 and 9
    return f"{scaffold[:9]}{motif}{scaffold[9:]}"


u = 225055
v = 226655

us, vs = make_cascades(u), make_cascades(v)
print(us, vs)
metrics_pair = MakeMetricsPair(df, us, vs)()
# cas1, cas2 = metrics_pair

view1, view2, indices1, indices2 = metrics_pair.plot_cascade_pair(
    metrics_pair.cascade1, metrics_pair.cascade2, return_indices=True
)


print(f"Results for ids: {u}, {v}")
# print(metrics_pair)
print("First column sequences (short): ")
short_seq1 = aa_map([metrics_pair.cascade1.sequences[0]])[0]
short_seq2 = aa_map([metrics_pair.cascade2.sequences[0]])[0]
print(short_seq1)
print(short_seq2)


print(f"Indices1: {indices1}")
print(f"Indices2: {indices2}")
view1.show()
view2.show()

print(f"Results for ids: {u}, {v}")
# print(metrics_pair)
# print("First column sequences (short): ")
short_seq1 = aa_map([metrics_pair.cascade1.sequences[0]])[0]
short_seq2 = aa_map([metrics_pair.cascade2.sequences[0]])[0]
print(short_seq1)
print(short_seq2)
# print("\nScaffolded motifs:")
# print(scaffolded_motif(short_seq1))
# print(scaffolded_motif(short_seq2))

[225055, 225073, 225083] [226655, 226675, 226685]
Results for ids: 225055, 226655
First column sequences (short): 
DLNIE
YIRNYDFYIDEAK
Indices1: [[0, 1, 6, 7, 8], [0, 1, 6, 7, 8]]
Indices2: [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [2, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]]


Results for ids: 225055, 226655
DLNIE
YIRNYDFYIDEAK


In [12]:
from helpers.utils import aa_map, residue_map
from helpers.cascades import MakeMetricsPair

make_cascades = CascadingEdges(edges)

# ubi = "MQIFVKTLTG KTITLEVEPS DTIENVKAKI QDKEGIPPDQ QRLIFAGKQL EDGRTLSDYN IQKESTLHLV LRLRGG"
ubiquitin_scaffold = "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG"
# MQIFVKTLT-[Motif]-GKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG

def scaffolded_motif(motif, scaffold=ubiquitin_scaffold):
    # Position 8 and 9
    return f"{scaffold[:9]}{motif}{scaffold[9:]}"


# def do_surgery(motif: str, other_pdb):


# fetcher = FetchPDBids(list_of_pdbs)
# fetcher()
# fetcher.transform()
# print(f"Number of fetched datums: {len(fetcher.datums)}")



# aa_map([residue_map(fetcher.datums[0].residue_token)])[0]



u = 250321
v = 250320

# u = 220118
# v = 8132

# u = 240428
# v = 138485

us, vs = make_cascades(u), make_cascades(v)
print(us, vs)
metrics_pair = MakeMetricsPair(df, us, vs)()
# cas1, cas2 = metrics_pair

view1, view2, indices1, indices2 = metrics_pair.plot_cascade_pair(
    metrics_pair.cascade1, metrics_pair.cascade2, return_indices=True
)


print(f"Results for ids: {u}, {v}")
# print(metrics_pair)
print("First column sequences (short): ")
short_seq1 = aa_map([metrics_pair.cascade1.sequences[0]])[0]
short_seq2 = aa_map([metrics_pair.cascade2.sequences[0]])[0]
print(short_seq1)
print(short_seq2)


print(f"Indices1: {indices1}")
print(f"Indices2: {indices2}")
view1.show()
view2.show()


[250321, 250354, 250371] [250320, 250354, 250371]
Results for ids: 250321, 250320
First column sequences (short): 
EPHPEGGFYHQTF
ELGLEPHPEGGFY
Indices1: [[0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [8, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]]
Indices2: [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]]


In [13]:
# df.groupby("level").size()

def sample_for_level(df, level, n_samples=1):
    return df.groupby("level").get_group(level).sample(n=n_samples)

for level in range(1, 5):
    print(sample_for_level(df, level, 1)['scalar_rep'].values[0].shape)



(33,)
(46,)
(64,)
(89,)


In [14]:
# display(neighbor_metrics.metrics)
for metric in neighbor_metrics.metrics:
    print(metric[0])

Metrics(distance=0.012842354205857701, alignment=6.0, hamming=0, cosine=6.551335725024643e-06)
Metrics(distance=0.15576868414222936, alignment=2.0, hamming=5, cosine=0.00027499027992183844)
Metrics(distance=0.3157634273554836, alignment=2.0, hamming=6, cosine=0.00032662936014216104)
Metrics(distance=0.6314175307326731, alignment=1.0, hamming=5, cosine=0.000544399093473702)
Metrics(distance=0.28981955241129814, alignment=2.0, hamming=4, cosine=0.0005691370006712937)


In [15]:
neighbor_metrics.plot()

Query: 1dqpA. part sequence: NGVNRY


Neighbor 0 at index 167855, PDB ID: 1dqpB. part sequence: NGVNRY -- Alignment: 6.0, RMSD: 0.0128, cosine: 0.000007


Neighbor 1 at index 32400, PDB ID: 1a3aC. part sequence: LLAGRK -- Alignment: 2.0, RMSD: 0.1558, cosine: 0.000275


Neighbor 2 at index 162197, PDB ID: 1a2xA. part sequence: MMEGVQ -- Alignment: 2.0, RMSD: 0.3158, cosine: 0.000327


Neighbor 3 at index 48481, PDB ID: 1ewyC. part sequence: KEEDLY -- Alignment: 1.0, RMSD: 0.6314, cosine: 0.000544


Neighbor 4 at index 188167, PDB ID: 1au1A. part sequence: TGYLRN -- Alignment: 2.0, RMSD: 0.2898, cosine: 0.000569


Try to structure by the distance of the top nodes. 

### Pairwise Comparisons

Look at pairs of indices and inspect their parents.

In [None]:
from typing import List
from moleculib.graphics.py3Dmol import plot_py3dmol_grid


def find_pdb(pdb_id, level=None):
    """Find matches where the pdb_id column contains `pdb_id`."""
    if level is None:
        return df[df['pdb_id'].str.contains(pdb_id, case=False)]
    return df[df['pdb_id'].str.contains(pdb_id, case=False) & (df['level'] == level)]

def make_grid(datums: List[ProteinDatum], num_columns=3):
    return [datums[i:i + num_columns] for i in range(0, len(datums), num_columns)]



class NeighborSearch(MakeNeighborMetrics):
    def __init__(self, df, edges):
        self.df = df
        self.edges = edges

    def __call__(self, index, n_neighbors=7):
        super().__init__(self.df, self.edges, index)
        self.u = index
        neighbor_metrics, distances, top_vectors = super().__call__(n_neighbors=n_neighbors)
        return neighbor_metrics

neighbor_search = NeighborSearch(df, edges)


#### TERM Pair

In [None]:
term_pair = ['2kl8', '2ln3']

term1 = find_pdb(term_pair[0], level=4)
term2 = find_pdb(term_pair[1], level=4)
print(term1.index[:3])
print(term2.index[:3])

term1_index = term1.index[np.random.randint(len(term1))]
neighbor_metrics, distances, top_vectors =  MakeNeighborMetrics(df, edges, term1_index)(n_neighbors=7)
# print(neighbor_metrics)
neighbor_metrics.plot()

neighbor_search(term1_index).plot()

#### Beta Helices

In [None]:

# pertactin_pdbid = '1dab'
pertactin_pdbid = '1cb8'
not_pertactin_pdbid = '1hin'

pertactin = find_pdb(pertactin_pdbid, level=2)
not_pertactin = find_pdb(not_pertactin_pdbid, level=2)
print(pertactin.index[:3])
print(not_pertactin.index[:3])

neighbor_search(pertactin.index[0]).plot()

In [None]:


MOTIF = "KKYFMSSVRRMPL"
scaffolded_motif(MOTIF)

# grid1 = make_grid(term1['datum'].values)
# plot_py3dmol_grid(grid1).show()
# print("Grid 2")
# grid2 = make_grid(term2['datum'].values)
# plot_py3dmol_grid(grid2)



#### Single Pair

#### Many Pairs

In [None]:


def get_pair(u, v):
    make_cascades = CascadingEdges(edges)
    us, vs = make_cascades(u), make_cascades(v)
    print(us, vs)
    return MakeMetricsPair(df, us, vs)()

def process_uv_pairs(pairs):
    results = []
    for u, v in pairs:
        metrics_pair = get_pair(u, v)
        view1, view2, indices1, indices2 = metrics_pair.plot_cascade_pair(
            metrics_pair.cascade1, metrics_pair.cascade2, return_indices=True
        )
        view1.show()
        view2.show()
        results.append((view1, view2, indices1, indices2))
    return metrics_pair

# pair_lst = [(250101, 232844), (232845, 232847), (232843, 232846)]
pair_lst = [(247724, 178579)]
metrics_pair = process_uv_pairs(pair_lst)


In [None]:
print(metrics_pair)



In [None]:
import matplotlib.pyplot as plt
from scipy.spatial import distance as ssd
from tqdm import tqdm


# child, parent listing of thresholds
thresholds = [0.4, 0.5]

# Level 1 interesting quadrants
top_left = []
top_right = []
bottom_left = []
bottom_right = []

def get_parent_quadrants(df, u_index, v_index, level_bot, level_top):
    """On input two indices, return the quadrants of the parent."""

    point1 = np.stack(df.loc[u_index]['scalar_rep'])
    point2 = np.stack(df.loc[v_index]['scalar_rep'])
    child_distance = ssd.cosine(point1, point2)


    try:
        point1_parent_indices = cascading_edges(u_index)[level_top-level_bot]
        point2_parent_indices = cascading_edges(v_index)[level_top-level_bot]
        parent1_row = df.iloc[point1_parent_indices]
        parent2_row = df.iloc[point2_parent_indices]
        point1_parent = np.stack(parent1_row['scalar_rep'])
        point2_parent = np.stack(parent2_row['scalar_rep'])
        parent_distance = ssd.cosine(point1_parent, point2_parent)

        if child_distance < thresholds[0] and parent_distance < thresholds[1]:
            bottom_left.append((point1_parent_indices, point2_parent_indices))
        elif child_distance < thresholds[0] and parent_distance > thresholds[1]:
            top_left.append((point1_parent_indices, point2_parent_indices))
        elif child_distance > thresholds[0] and parent_distance < thresholds[1]:
            bottom_right.append((point1_parent_indices, point2_parent_indices))
        elif child_distance > thresholds[0] and parent_distance > thresholds[1]:
            top_right.append((point1_parent_indices, point2_parent_indices))
        else:
            print("SOMETHING HAPPENED")

    except (ValueError, IndexError):
        return None, None, None, None

    return point1_parent_indices, point2_parent_indices, parent_distance, child_distance

def cascade_lvl1(df, level_top=4, n_iter=100):
    """Doc String"""

    level_bot = 1

    parent_distances = []
    child_distances = []

    level_df = df[df['level'] == level_bot]
    for i in tqdm(range(n_iter)):
        sampled_indices = np.random.choice(level_df.index, 2, replace=False)
        # point1_row = level_df.loc[sampled_indices[0]]
        # point2_row = level_df.loc[sampled_indices[1]]

        parent_indices, child_indices, parent_distance, child_distance = get_parent_quadrants(
            df, sampled_indices[0], sampled_indices[1], level_bot, level_top
        )
        if parent_indices is None:
            continue

        # parent_distances.append(parent_distance)
        # child_distances.append(child_distance)

    for i in tqdm(range(len(top_left))):
        sampled_indices = top_left[i]
        point1_parent_indices, point2_parent_indices, parent_distance, child_distance = get_parent_quadrants(
            df, sampled_indices[0], sampled_indices[1], level_bot, level_top
        )

        parent_distances.append(parent_distance)
        child_distances.append(child_distance)

    


    return parent_distances, child_distances


