In [5]:
# Sets to tuple method
similar_ids = { 
    123: [458, 812, 765], 
    458: [123, 812, 765], 
    812: [123, 458], 
    765: [123, 458], 
    999: [100], 
    100: [999] 
}

# Initialize set data object, Sets have unique elements 
# if try to add an element that already exist will auto-removed from set because have same hash
result_set=set()

# Get all possible pairs
for article_from_i, articles_to_i in similar_ids.items():
    for article_to_j in articles_to_i:
        # Add every article from and article to pair by a set and get default ordered 
        # by set creation. and finally create a tuple because set is a unhashable type
        result_set.add(tuple({article_from_i, article_to_j}))

result_set




{(100, 999), (123, 765), (123, 812), (458, 123), (458, 765), (458, 812)}

In [15]:
# order low to high method
similar_ids = { 
    123: [458, 812, 765], 
    458: [123, 812, 765], 
    812: [123, 458], 
    765: [123, 458], 
    999: [100], 
    100: [999] 
}

# Initialize set data object, Sets have unique elements 
# if try to add an element that already exist will auto-removed from set because have same hash
result_set=set()

# Get all possible pairs
for article_from_i, articles_to_i in similar_ids.items():
    for article_to_j in articles_to_i:
        values=(
            (article_from_i, article_to_j) 
            if article_from_i <= article_to_j 
            else 
            (article_to_j, article_from_i)
        )
        # Add every article from and article to pair by evaluate id value 
        # if article_from_i is lesser than article_to_j (article_from_i, article_to_j) 
        # otherwise (article_to_j, article_from_i)
        result_set.add(values)

result_set




{(100, 999), (123, 458), (123, 765), (123, 812), (458, 765), (458, 812)}

In [1]:
# Online Mode Load required functions
import requests
import ast


def get_similar_articles_ids(article_id:int):
    # Part of result of HTML where shows full id query (scrapping)
    # Example
    # <meta name="log_displayeduids" content="33400058,33781287,33128197,34369712,33272178,32889088,33034824,33779135,34356617,32997322" />
    begin_query_str="<meta name=\"log_displayeduids\" content=\""
    end_query_str="\" />"

    url=f"https://pubmed.ncbi.nlm.nih.gov/?linkname=pubmed_pubmed&from_uid={article_id}"
    request_obj=requests.get(url=url)
    
    request_str=request_obj.text
    partial_result=request_str[request_str.index(begin_query_str)+len(begin_query_str):]
    ids_str=partial_result[:partial_result.index(end_query_str)]
    return ast.literal_eval(ids_str)


def extract_unique_ordered_pairs(related_ids):
    # Initialize set data object, Sets have unique elements 
    # if try to add an element that already exist will auto-removed from set because have same hash
    result_set=set()

    # Get all possible pairs
    for article_from_i, articles_to_i in related_ids.items():
        for article_to_j in articles_to_i:
            values=(
                (article_from_i, article_to_j) 
                if article_from_i <= article_to_j 
                else 
                (article_to_j, article_from_i)
            )
            # Add every article from and article to pair by evaluate id value 
            # if article_from_i is lesser than article_to_j (article_from_i, article_to_j) 
            # otherwise (article_to_j, article_from_i)
            result_set.add(values)
    return result_set

print("OK")


OK


In [3]:
# Example "online"
# Id of article that will get all another similar articles
main_article_id=33400058

similar_ids_list=get_similar_articles_ids(main_article_id)

# Initialize dict
similar_ids={
    main_article_id:similar_ids_list,
}

# Search nested articles
nested_level = 3

similar_ids_iter=iter(similar_ids_list)
for i in range(nested_level):
    similar_id_i=next(similar_ids_iter)
    # Force to get next value
    if (similar_id_i==main_article_id):
        similar_id_i=next(similar_ids_iter)
    
    similar_ids_i_list=get_similar_articles_ids(similar_id_i)
    similar_ids[similar_id_i]=similar_ids_i_list


# get unique pairs
extract_unique_ordered_pairs(similar_ids)


{(32307245, 33128197),
 (32336723, 33128197),
 (32416679, 33781287),
 (32791241, 34369712),
 (32830930, 33781287),
 (32889088, 33400058),
 (32997322, 33128197),
 (32997322, 33400058),
 (33034824, 33400058),
 (33112236, 33781287),
 (33128197, 33128197),
 (33128197, 33305554),
 (33128197, 33308510),
 (33128197, 33319627),
 (33128197, 33336780),
 (33128197, 33400058),
 (33128197, 33555378),
 (33128197, 33687358),
 (33128197, 34369712),
 (33132205, 33781287),
 (33249077, 33781287),
 (33272178, 33400058),
 (33272178, 34369712),
 (33319627, 34369712),
 (33362758, 34369712),
 (33400058, 33400058),
 (33400058, 33779135),
 (33400058, 33781287),
 (33400058, 34356617),
 (33400058, 34369712),
 (33676349, 34369712),
 (33781287, 33781287),
 (33781287, 33917481),
 (33781287, 34311539),
 (33781287, 34378968),
 (33781287, 35632703),
 (34276652, 34369712),
 (34356617, 34369712),
 (34369712, 34369712)}