# A simple bottleneck

Very simple code that :
- make_random_edges : generate a random graph given a number of edge and nodes
- find_unique_edges : filter the list of edges to only keep an instance of each (i.e. remove duplicate edges)
- contains : verify if an edge is part of a list

In [1]:
import random

def make_random_edges(n_edges=100, n_nodes=10):
    random.seed(42)
    edges = [[random.randint(0, n_nodes), random.randint(0, n_nodes)] for e in range(n_edges)]
    return edges

def find_unique_edges(edges):
    edges = list(edges)
    unique_edges = []
    while edges:
        edge = edges.pop()
        if not contains(edges, edge):
            unique_edges.append(edge)
    return unique_edges

def contains(edges, edge):
    for e in edges:
        if sorted(e) == sorted(edge):
            return True
    return False


Some profiling functions to ease the process

In [2]:
from functools import wraps
from cProfile import Profile
from tempfile import NamedTemporaryFile
import pstats

_time_profiles = {}

def profile_time(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        profile = Profile()
        ret = profile.runcall(func, *args, **kwargs)
        _time_profiles[(wrapper, ) + args] = profile
        
        return ret 
    return wrapper

def profile_stats(profile):
    temp_stats = NamedTemporaryFile()
    profile.dump_stats(temp_stats.name)
    return pstats.Stats(temp_stats.name)

Simple profiling with text outputs  

In [3]:
@profile_time
def remove_duplicate_edges(n_edges=2000):
    edges = make_random_edges(n_edges, 200)
    unique_edges = find_unique_edges(edges)
    return unique_edges

unique_edges = remove_duplicate_edges(2000)

profile = _time_profiles[(remove_duplicate_edges, 2000)]

stats = profile_stats(profile)
stats.strip_dirs()
stats.sort_stats('time')
stats.print_stats()

Mon Nov  3 16:55:12 2025    /tmp/tmpxz84w9xn

         3942601 function calls in 1.320 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
  3903622    0.714    0.000    0.714    0.000 {built-in method builtins.sorted}
     2000    0.590    0.000    1.304    0.001 2053488929.py:17(contains)
     4000    0.006    0.000    0.007    0.000 random.py:239(_randbelow_with_getrandbits)
     4000    0.004    0.000    0.011    0.000 random.py:292(randrange)
        1    0.002    0.002    1.307    1.307 2053488929.py:8(find_unique_edges)
        1    0.001    0.001    0.013    0.013 2053488929.py:5(<listcomp>)
     4000    0.001    0.000    0.012    0.000 random.py:366(randint)
    12000    0.001    0.000    0.001    0.000 {built-in method _operator.index}
     5051    0.001    0.000    0.001    0.000 {method 'getrandbits' of '_random.Random' objects}
     1919    0.000    0.000    0.000    0.000 {method 'append' of 'list' objects}
     4

<pstats.Stats at 0x7f6599aa7760>

Advance profiling with visualization

In [4]:
%reload_ext snakeviz

ModuleNotFoundError: No module named 'snakeviz'

In [None]:
from snakeviz.ipymagic import open_snakeviz_and_display_in_notebook

def display_stats(profile):
    temp_stats = NamedTemporaryFile(delete=False)
    profile.dump_stats(temp_stats.name)
    return open_snakeviz_and_display_in_notebook(temp_stats.name)

In [None]:
@profile_time
def remove_duplicate_edges(n_edges=2000):
    edges = make_random_edges(n_edges, 200)
    unique_edges = find_unique_edges(edges)
    return unique_edges

unique_edges = remove_duplicate_edges(2000)

profile = _time_profiles[(remove_duplicate_edges, 2000)]
display_stats(profile)

Propose a new implementation and profile it