In [18]:
from fuzzywuzzy import fuzz
from libgenparser.parser import LibgenParser
libgen = LibgenParser()

In [29]:
# Raw search
books = libgen.search_author("Marcus Aurelius")
len(books)

25

In [20]:
# Filtro de lenguaje y extension
books_filtered = [d for d in search_result if d['Language'] in ['English', 'Spanish'] and d['Extension'] in ['pdf', 'epub', 'mobi']]
len(books_filtered)

21

In [57]:
# Filtro de duplicados en titulo, extension y lenguaje
# fuzzy string matching using Levenshtein distance (threshold = 80)

# Filtro de duplicados en titulo, extension y lenguaje
# fuzzy string matching using Levenshtein distance (threshold = 80)

unique_books = {}

for book in books_filtered:
    key = (book['Title'], book['Language'], book['Extension'])
    print(key)

    # Check if a book with the same key exists
    if key in unique_books:
        existing_book = unique_books[key]

        # Compare titles if extension and language match
        if existing_book['Extension'] == book['Extension'] and existing_book['Language'] == book['Language']:
            print(f"{existing_book['Title']} is similar to {existing_book['Title']}")
            similarity_score = fuzz.token_set_ratio(existing_book['Title'], book['Title'])
            if similarity_score >= 80:
                # Keep the existing book (higher similarity) and discard the current book
                continue
    else:
        unique_books[key] = book

result = list(unique_books.values())
len(result)

('Meditations', 'English', 'epub')


KeyError: ('Meditations', 'English', 'epub')

In [61]:
# Filtro de duplicados en titulo, extension y lenguaje
# fuzzy string matching using Levenshtein distance (threshold = 80)

unique_books = {}

for book in books_filtered:
    key = (book['Title'], book['Language'], book['Extension'])
    most_similar_key = None
    highest_similarity = 0

    # Find the most similar existing key
    for existing_key in unique_books:
        existing_title, existing_language, existing_extension = existing_key
        similarity_score = fuzz.token_set_ratio(existing_title, key[0])

        if similarity_score > highest_similarity:
            highest_similarity = similarity_score
            most_similar_key = existing_key

    if most_similar_key is not None:
        existing_book = unique_books[most_similar_key]

        # Compare titles if extension and language match
        if existing_book['Extension'] == key[2] and existing_book['Language'] == key[1]:
            if highest_similarity >= 80:
                # Keep the existing book (higher similarity) and discard the current book
                continue

    unique_books[key] = book

result = list(unique_books.values())
len(result)
result

[{'Thumb': 'http://libgen.rs/covers/359000/D66190381CF9FD46CF56735AF1348EC6-g.jpg',
  'Download_link': 'http://libgen.rs/get?&md5=D66190381CF9FD46CF56735AF1348EC6',
  'MD5': 'D66190381CF9FD46CF56735AF1348EC6',
  'Title': 'Meditations',
  'Author': 'Marcus Aurelius',
  'Year': None,
  'Language': 'English',
  'Pages': None,
  'ID': '359874',
  'Size': '357 Kb (365936)',
  'Extension': 'epub'},
 {'Thumb': 'http://libgen.rs/covers/359000/d970445ca4d558e8f7f7157c0dcd7e83-g.jpg',
  'Download_link': 'http://libgen.rs/get?&md5=D970445CA4D558E8F7F7157C0DCD7E83',
  'MD5': 'D970445CA4D558E8F7F7157C0DCD7E83',
  'Title': 'Meditations',
  'Author': 'Marcus Aurelius',
  'Year': None,
  'Language': 'English',
  'Pages': '16',
  'ID': '359875',
  'Size': '118 Kb (120991)',
  'Extension': 'pdf'},
 {'Thumb': 'http://libgen.rs/covers/391000/5a7d04c411aea3cf8d360f0aead1187a-d.jpg',
  'Download_link': 'http://libgen.rs/get?&md5=5A7D04C411AEA3CF8D360F0AEAD1187A',
  'MD5': '5A7D04C411AEA3CF8D360F0AEAD1187A',

In [13]:
download_urls = [libgen.resolve_download_link(md5=search_results_filtered[i]['MD5']) for i in range(len(search_results_filtered))]
download_urls

['https://cloudflare-ipfs.com/ipfs/bafykbzacecda3pjf527oggxravhumsy7rrzteszg6j2lximxuipvdkvu5fcbs?filename=Marcus%20Aurelius%20-%20Meditations.epub',
 'https://cloudflare-ipfs.com/ipfs/bafk2bzacedefgkn47ojdljq7aj7ues2wdp5fxzxyma6ivxlauaahs537ls6uc?filename=Marcus%20Aurelius%20-%20Meditations.pdf',
 'https://cloudflare-ipfs.com/ipfs/bafykbzacecprhft75mlq2madmlp5kksuq2ko4dttdnqxh2zc2ipat3vumm6jq?filename=Marcus%20Aurelius%20-%20Meditations%20.pdf',
 'https://cloudflare-ipfs.com/ipfs/bafk2bzaceamg6mqbnh5cll7rqikmqyadphe6v3v5nojh73qpi7qzqsicu2rxe?filename=Marcus%20Aurelius%20-%20Selbstbetrachtungen.epub',
 'https://cloudflare-ipfs.com/ipfs/bafk2bzacedx6aw3izvscviddbll57izbkmravsj3wq3jxi4ttmryozol5n4xy?filename=Marcus%20Aurelius%20-%20Thoughts.epub',
 'https://cloudflare-ipfs.com/ipfs/bafk2bzacebilaedeod4iasj6x74o7noz227xygb4z2i5fvb2hw7yiy2eba37u?filename=%28Penguin%20Classics%29%20Marcus%20Aurelius%2C%20Martin%20Hammond%2C%20Diskin%20Clay%20-%20Meditations%20-Penguin%20Classics%20%282006%2