In [1]:
import requests
from selenium import webdriver
from bs4 import BeautifulSoup
from requests.exceptions import ConnectionError
from time import sleep
import pandas as pd

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By 
from selenium.webdriver.common.keys import Keys

In [3]:
options = Options()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

In [4]:
url = 'https://www.libgen.is'
driver.get(url)
html = driver.page_source

In [5]:
search_field = driver.find_element(By.NAME, 'req')
search_button = driver.find_element(By.XPATH, "//input[@value='Search!']")

In [6]:
search_query = 'Laurence Evans'
search_field.send_keys(search_query)
search_button.click()

In [7]:
html = driver.page_source

In [9]:
soup = BeautifulSoup(html, 'html.parser')

In [10]:
for tr in soup.find_all('tr')[1:]:  # Se salta el primer <tr> que normalmente son los encabezados
    tds = tr.find_all('td')
    if len(tds) > 9:  # Asumiendo que hay al menos 10 columnas: ID, autor, título, etc.
        # Ejemplo básico para extraer datos comunes
        author = tds[1].text.strip()
        title = tds[2].text.strip()
        publisher = tds[3].text.strip()
        year = tds[4].text.strip()
        pages = tds[5].text.strip()
        language = tds[6].text.strip()
        size = tds[7].text.strip()
        extension = tds[8].text.strip()
        # Enlaces espejo, asumiendo que están en la última columna
        mirrors = [a['href'] for a in tds[-1].find_all('a')]

        print(f"Author: {author}")
        print(f"Title: {title}")
        print(f"Publisher: {publisher}")
        print(f"Year: {year}")
        print(f"Pages: {pages}")
        print(f"Language: {language}")
        print(f"Size: {size}")
        print(f"Extension: {extension}")
        print(f"Mirrors: {mirrors}")
        print("--------")

Author: Author(s)
Title: Title
Publisher: Publisher
Year: Year
Pages: Pages
Language: Language
Size: Size
Extension: Extension
Mirrors: []
--------
Author: Laurence G Felker
Title: The Evans equations of Unified Field Theory 9781845492144, 1845492145
Publisher: Arima Publishing
Year: 2005
Pages: 373
Language: English
Size: 3 Mb
Extension: pdf
Mirrors: ['https://library.bz/main/edit/5862F22C2EE265252E504631A3827AF9']
--------
Author: Evans, Peter; McLean, William F.; Silver, Pete
Title: Structural engineering for architects : a handbook 1780670559, 978-178067-055-3, 9781780674261, 1780674260
Publisher: Laurence King Publishing
Year: 2013
Pages: 208[209]
Language: English
Size: 17 Mb
Extension: pdf
Mirrors: ['https://library.bz/main/edit/EA91986CFFDC22DB217C736BD7AAB2FF']
--------
Author: Laurence Harris , Jerry Coakley , Martin Croasdale , Trevor Evans
Title: New Perspectives on the Financial System 0709937415
Publisher: Croom Helm Ltd
Year: 1988
Pages: [207]
Language: English
Size: 178

In [11]:
books_data = []

for tr in soup.find_all('tr')[1:]:  # Se salta el primer <tr> que son los encabezados
    tds = tr.find_all('td')
    if len(tds) > 9:  # Verifica que haya suficientes columnas
        # Crea un diccionario con los datos de cada libro
        book_dict = {
            "Author": tds[1].text.strip(),
            "Title": tds[2].text.strip(),
            "Publisher": tds[3].text.strip(),
            "Year": tds[4].text.strip(),
            "Pages": tds[5].text.strip(),
            "Language": tds[6].text.strip(),
            "Size": tds[7].text.strip(),
            "Extension": tds[8].text.strip(),
            "Mirrors": [a['href'] for a in tds[-1].find_all('a')],
        }
        if 'pdf' in str(book_dict.get('Extension')):
            print(book_dict)
            # Agrega el diccionario a la lista
            books_data.append(book_dict)

{'Author': 'Laurence G Felker', 'Title': 'The Evans equations of Unified Field Theory 9781845492144, 1845492145', 'Publisher': 'Arima Publishing', 'Year': '2005', 'Pages': '373', 'Language': 'English', 'Size': '3 Mb', 'Extension': 'pdf', 'Mirrors': ['https://library.bz/main/edit/5862F22C2EE265252E504631A3827AF9']}
{'Author': 'Evans, Peter; McLean, William F.; Silver, Pete', 'Title': 'Structural engineering for architects : a handbook 1780670559, 978-178067-055-3, 9781780674261, 1780674260', 'Publisher': 'Laurence King Publishing', 'Year': '2013', 'Pages': '208[209]', 'Language': 'English', 'Size': '17 Mb', 'Extension': 'pdf', 'Mirrors': ['https://library.bz/main/edit/EA91986CFFDC22DB217C736BD7AAB2FF']}
{'Author': 'Laurence Harris , Jerry Coakley , Martin Croasdale , Trevor Evans', 'Title': 'New Perspectives on the Financial System 0709937415', 'Publisher': 'Croom Helm Ltd', 'Year': '1988', 'Pages': '[207]', 'Language': 'English', 'Size': '178 Mb', 'Extension': 'pdf', 'Mirrors': ['https

In [12]:
# Convert the list of dictionaries into a list of tuples
books_data = [tuple(book.items()) for book in books_data]

# Convert the list of tuples back into a list of dictionaries
books_data = [dict(book) for book in books_data]

In [13]:
books_data

[{'Author': 'Laurence G Felker',
  'Title': 'The Evans equations of Unified Field Theory 9781845492144, 1845492145',
  'Publisher': 'Arima Publishing',
  'Year': '2005',
  'Pages': '373',
  'Language': 'English',
  'Size': '3 Mb',
  'Extension': 'pdf',
  'Mirrors': ['https://library.bz/main/edit/5862F22C2EE265252E504631A3827AF9']},
 {'Author': 'Evans, Peter; McLean, William F.; Silver, Pete',
  'Title': 'Structural engineering for architects : a handbook 1780670559, 978-178067-055-3, 9781780674261, 1780674260',
  'Publisher': 'Laurence King Publishing',
  'Year': '2013',
  'Pages': '208[209]',
  'Language': 'English',
  'Size': '17 Mb',
  'Extension': 'pdf',
  'Mirrors': ['https://library.bz/main/edit/EA91986CFFDC22DB217C736BD7AAB2FF']},
 {'Author': 'Laurence Harris , Jerry Coakley , Martin Croasdale , Trevor Evans',
  'Title': 'New Perspectives on the Financial System 0709937415',
  'Publisher': 'Croom Helm Ltd',
  'Year': '1988',
  'Pages': '[207]',
  'Language': 'English',
  'Size':