In [75]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
from urllib.parse import urljoin
from datetime import datetime


In [76]:
homepage_url = "http://books.toscrape.com/"
homepage_response = requests.get(homepage_url)
homepage_response.encoding = 'utf-8' #per ruajtjen e cmimit
homepage_soup = BeautifulSoup(homepage_response.text, 'html.parser')

In [77]:
# getting a category list with the category name and the corresponding link

def get_categories():
  response = requests.get(homepage_url)
  response.encoding = 'utf-8'
  soup = BeautifulSoup(response.text, 'html.parser')
  categories_tags = soup.select_one('div.side_categories > ul > li > ul').find_all('li', recursive = False)
  categories = {}
  for tag in categories_tags:
    name = tag.text.strip()
    link = urljoin(homepage_url, tag.find('a')['href'])
    categories[name] = link
  return categories

In [78]:
#get all books from all pages of a specific category
def scrape_category(category_name, start_url):
  books=[]
  current_url = start_url

  while current_url:
    response = requests.get(current_url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')

    for article in soup.find_all('article', class_='product_pod'):
      title = article.h3.a['title']
      price = article.find('p', class_='price_color').text
      clean_price = float(re.findall(r'[0-9.]+', price)[0])
      #adds all books of the current page
      books.append({'title': title, 'Price (GBP)': clean_price, 'category': category_name})
# also checking for pagination, if there's other pages, we scrape those too
    next_button = soup.find('li', class_='next')
    if next_button:
      relative_link = next_button.find('a')['href']
      current_url = urljoin(current_url, relative_link)
    else:
      current_url = None

  return books

In [79]:
def get_exchange_rate(api_key, base_currency, target_currency):
  url = f"https://v6.exchangerate-api.com/v6/{api_key}/latest/{base_currency}"
  response = requests.get(url)
  data = response.json()

  if data["result"] == "success":
    return data["conversion_rates"][target_currency]
  else:
    print("Error fetching API data!")
    return None

API_KEY = "684805c07e933d266961ebbe"

rate = get_exchange_rate(API_KEY, "GBP", "EUR")

print(rate)


1.1476


In [80]:
from os import name
all_categories = get_categories()

eur_rate = get_exchange_rate(API_KEY, "GBP","EUR")

final_data = []

for category_name in all_categories:
  print(f"Scraping category: {category_name}")
  results = scrape_category(category_name, all_categories[category_name])
  final_data.extend(results)


Scraping category: Travel
Scraping category: Mystery
Scraping category: Historical Fiction
Scraping category: Sequential Art
Scraping category: Classics
Scraping category: Philosophy
Scraping category: Romance
Scraping category: Womens Fiction
Scraping category: Fiction
Scraping category: Childrens
Scraping category: Religion
Scraping category: Nonfiction
Scraping category: Music
Scraping category: Default
Scraping category: Science Fiction
Scraping category: Sports and Games
Scraping category: Add a comment
Scraping category: Fantasy
Scraping category: New Adult
Scraping category: Young Adult
Scraping category: Science
Scraping category: Poetry
Scraping category: Paranormal
Scraping category: Art
Scraping category: Psychology
Scraping category: Autobiography
Scraping category: Parenting
Scraping category: Adult Fiction
Scraping category: Humor
Scraping category: Horror
Scraping category: History
Scraping category: Food and Drink
Scraping category: Christian Fiction
Scraping category: 

In [81]:
df = pd.DataFrame(final_data)
#add new column for EUR prices
df.insert(0, 'ID', range(1, 1 + len(df)))
df['Price (EUR)'] = (df['Price (GBP)'] * rate).round(2)
df['Exchange Date'] = datetime.now().strftime("%Y-%m-%d")
df.to_csv('books_data.csv', index=False, encoding='utf-8')
print(df)

       ID                                              title  Price (GBP)  \
0       1                            It's Only the Himalayas        45.17   
1       2  Full Moon over Noahâ€™s Ark: An Odyssey to Mount...        49.43   
2       3  See America: A Celebration of Our National Par...        48.87   
3       4  Vagabonding: An Uncommon Guide to the Art of L...        36.94   
4       5                               Under the Tuscan Sun        37.33   
..    ...                                                ...          ...   
995   996  Why the Right Went Wrong: Conservatism--From G...        52.65   
996   997  Equal Is Unfair: America's Misguided Fight Aga...        56.86   
997   998                                     Amid the Chaos        36.58   
998   999                                         Dark Notes        19.19   
999  1000  The Long Shadow of Small Ghosts: Murder and Me...        10.97   

     category  Price (EUR) Exchange Date  
0      Travel        51.84    