In [1]:
from bs4 import BeautifulSoup
import requests
from urllib.request import urlretrieve

In [6]:
def get_libgen_url(book_name:str):
    url = f"https://libgen.is/search.php?&res=100&req={book_name}&phrase=1&view=simple&column=def&sort=year&sortmode=DESC"
    return url

In [12]:
def extract_pdf_links(html_content):
    """
    Extract download links for PDF versions of books from HTML table.
    
    Args:
        html_content (str): HTML content containing the book table
        
    Returns:
        list: List of dictionaries containing book title, extension, and download links
    """
    # Parse the HTML
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Find all rows in the table
    rows = soup.find_all('tr', bgcolor=True)  # Using bgcolor attribute to find data rows
    
    # Store PDF links
    pdf_links = []
    
    # Process each row
    for row in rows:
        # Extract file extension
        extension_cell = row.find_all('td')[8]  # 9th column has the extension
        extension = extension_cell.text.strip()
        
        # Only process PDF files
        if extension == 'pdf':
            # Extract book title
            title_cell = row.find_all('td')[2]
            title = title_cell.text.strip()
            
            # Extract download links
            link_cells = row.find_all('td')[9:11]  # 10th and 11th columns have the download links
            links = []
            for link_cell in link_cells:
                a_tag = link_cell.find('a')
                if a_tag and 'href' in a_tag.attrs and "books.ms" in a_tag['href']:
                    links.append(a_tag['href'])
                    break
            
            # Add to our results
            pdf_links.append({
                'title': title,
                'extension': extension,
                'links': links
            })
    
    return pdf_links

In [None]:
def download_books(books_to_download:list):
    for book in books_to_download:
        html = requests.get(book.link)
        soup = BeautifulSoup(html_content, 'html.parser')
        download_link = soup.find("a", string="GET")["href"]
        urlretrieve(get_link, book.title + ".pdf")
        
        

In [7]:
url = get_libgen_url(book_name = "physics_of_the_impossible")

'https://libgen.is/search.php?req=physics_of_the_impossible&lg_topic=libgen&open=0&view=simple&res=100&phrase=1&column=def'

In [13]:
search_page = requests.get(url)

In [14]:
links = extract_pdf_links(search_page.content)

In [15]:
links

[{'title': 'Physics of the impossible [1\xa0ed.] 9780385520690, 0385520697',
  'extension': 'pdf',
  'links': ['http://books.ms/main/18030181E6D4F40D8FED29D77979FA04',
   'http://libgen.li/ads.php?md5=18030181E6D4F40D8FED29D77979FA04']},
 {'title': 'Physics of the Impossible: A Scientific Exploration into the World of Phasers, Force Fields, Teleportation, and Time Travel [1\xa0ed.] 0385520697, 9780385520690',
  'extension': 'pdf',
  'links': ['http://books.ms/main/C50CCD9AE95C0F2ED5DA1429007C817B',
   'http://libgen.li/ads.php?md5=C50CCD9AE95C0F2ED5DA1429007C817B']},
 {'title': 'Physics of the Impossible - A Scientific Exploration Into the World of Phasers, Force Fields, Teleportation, and Time Travel [Reprint\xa0ed.] 9780307278821, 0307278824',
  'extension': 'pdf',
  'links': ['http://books.ms/main/B13679F6A337319C561EE0C16F74A053',
   'http://libgen.li/ads.php?md5=B13679F6A337319C561EE0C16F74A053']},
 {'title': 'Physics of the Impossible: A Scientific Exploration into the World of P

In [16]:
from urllib.request import urlretrieve

In [29]:
urlretrieve(get_link, "physics_of_the_impossible" + ".pdf")

('physics_of_the_impossible.pdf', <http.client.HTTPMessage at 0x7b88ff708320>)

In [18]:
links[0]['links'][0]

'http://books.ms/main/18030181E6D4F40D8FED29D77979FA04'

In [19]:
d_page = requests.get('http://books.ms/main/18030181E6D4F40D8FED29D77979FA04')

In [21]:
soup = BeautifulSoup(d_page.content, 'html.parser')

In [27]:
get_link = soup.find("a", string="GET")["href"]

In [28]:
get_link

'https://download.books.ms/main/75000/18030181e6d4f40d8fed29d77979fa04/Michio%20Kaku%20-%20Physics%20of%20the%20impossible-Doubleday%20%282008%29.pdf'