In [1]:
from fuzzywuzzy import fuzz
from libgenparser.parser import LibgenParser
libgen = LibgenParser()

In [2]:
# Raw search
books = libgen.search_author("Marcus Aurelius")
len(books)

25

In [3]:
# Filtro de lenguaje (English y Spanish) y extension (epub, pdf, mobi)
books_filtered = [d for d in books if d['Language'] in ['English', 'Spanish'] and d['Extension'] in ['pdf', 'epub', 'mobi']]
len(books_filtered)

21

In [4]:
# Filtro de duplicados en titulo, extension y lenguaje
# fuzzy string matching using Levenshtein distance (threshold = 80)

books_filtered_unique = [books_filtered[0]]  # Adding the first dictionary directly

# Function to check if two dictionaries have matching language and extension
def has_matching_language_extension(dict1, dict2):
    return dict1['Language'] == dict2['Language'] and dict1['Extension'] == dict2['Extension']

# Function to check if a dictionary is similar to existing dictionaries
def is_similar_dict(new_dict, existing_dicts):
    for d in existing_dicts:
        if has_matching_language_extension(new_dict, d) and fuzz.token_set_ratio(new_dict['Title'].lower(), d['Title'].lower()) > 80:
            return True
    return False

# Iterate through the remaining dictionaries in the original list
for d in books_filtered[1:]:
    if not is_similar_dict(d, books_filtered_unique):
        books_filtered_unique.append(d)

# Print the filtered dictionary list
for d in books_filtered_unique:
    print(d)


{'Thumb': 'http://libgen.rs/covers/359000/D66190381CF9FD46CF56735AF1348EC6-g.jpg', 'Download_link': 'http://libgen.rs/get?&md5=D66190381CF9FD46CF56735AF1348EC6', 'MD5': 'D66190381CF9FD46CF56735AF1348EC6', 'Title': 'Meditations', 'Author': 'Marcus Aurelius', 'Year': None, 'Language': 'English', 'Pages': None, 'ID': '359874', 'Size': '357 Kb (365936)', 'Extension': 'epub'}
{'Thumb': 'http://libgen.rs/covers/359000/d970445ca4d558e8f7f7157c0dcd7e83-g.jpg', 'Download_link': 'http://libgen.rs/get?&md5=D970445CA4D558E8F7F7157C0DCD7E83', 'MD5': 'D970445CA4D558E8F7F7157C0DCD7E83', 'Title': 'Meditations', 'Author': 'Marcus Aurelius', 'Year': None, 'Language': 'English', 'Pages': '16', 'ID': '359875', 'Size': '118 Kb (120991)', 'Extension': 'pdf'}
{'Thumb': 'http://libgen.rs/covers/359000/1569AB2401F57CCA752831C05E216D7F-g.jpg', 'Download_link': 'http://libgen.rs/get?&md5=1569AB2401F57CCA752831C05E216D7F', 'MD5': '1569AB2401F57CCA752831C05E216D7F', 'Title': 'Selbstbetrachtungen', 'Author': 'Marcu

In [5]:
# GET downloadable links
download_urls = [libgen.resolve_download_link(md5=books_filtered_unique[i]['MD5']) for i in range(len(books_filtered_unique))]
download_urls

['https://cloudflare-ipfs.com/ipfs/bafykbzacecda3pjf527oggxravhumsy7rrzteszg6j2lximxuipvdkvu5fcbs?filename=Marcus%20Aurelius%20-%20Meditations.epub',
 'https://cloudflare-ipfs.com/ipfs/bafk2bzacedefgkn47ojdljq7aj7ues2wdp5fxzxyma6ivxlauaahs537ls6uc?filename=Marcus%20Aurelius%20-%20Meditations.pdf',
 'https://cloudflare-ipfs.com/ipfs/bafk2bzaceamg6mqbnh5cll7rqikmqyadphe6v3v5nojh73qpi7qzqsicu2rxe?filename=Marcus%20Aurelius%20-%20Selbstbetrachtungen.epub',
 'https://cloudflare-ipfs.com/ipfs/bafk2bzacedx6aw3izvscviddbll57izbkmravsj3wq3jxi4ttmryozol5n4xy?filename=Marcus%20Aurelius%20-%20Thoughts.epub',
 'https://cloudflare-ipfs.com/ipfs/bafykbzacecsrxmuv5gwot3u7ssa4qtpzksxdubgpmeecgck4jxakhlszz5p4i?filename=Marcus%20Aurelius_%20Lucian_%20Justin%20Martyr_%20Walter%20Pater%20-%20Marcus%20Aurelius%20and%20His%20Times.%20The%20Transition%20from%20Paganism%20to%20Christianity%20%20-Pub.%20for%20the%20Classics%20Club%20by%20W.%20J.%20Black%20%281945%29.pdf',
 'https://cloudflare-ipfs.com/ipfs/bafy