# Books Scraping Project

- Scrap the books (name, price, rate, category_name) for each category and put them into a CSV & Excel file
- https://books.toscrape.com/

## Import some helper modules

In [1]:
import csv
import requests
import pandas as pd
from bs4 import BeautifulSoup



## Scrape Process

- scrape function take url as input
- return (name , rate , price) for all books in any category
- save books info in CSV file¶

In [2]:
response = requests.request('GET', 'http://books.toscrape.com/catalogue/category/books/mystery_3/index.html')
soup = BeautifulSoup(response.content, 'html.parser')

In [3]:
all_articles = soup.find_all('article', attrs={'class': 'product_pod'})
len(all_articles)

20

In [4]:
all_articles[0]

<article class="product_pod">
<div class="image_container">
<a href="../../../sharp-objects_997/index.html"><img alt="Sharp Objects" class="thumbnail" src="../../../../media/cache/32/51/3251cf3a3412f53f339e42cac2134093.jpg"/></a>
</div>
<p class="star-rating Four">
<i class="icon-star"></i>
<i class="icon-star"></i>
<i class="icon-star"></i>
<i class="icon-star"></i>
<i class="icon-star"></i>
</p>
<h3><a href="../../../sharp-objects_997/index.html" title="Sharp Objects">Sharp Objects</a></h3>
<div class="product_price">
<p class="price_color">£47.82</p>
<p class="instock availability">
<i class="icon-ok"></i>
    
        In stock
    
</p>
<form>
<button class="btn btn-primary btn-block" data-loading-text="Adding..." type="submit">Add to basket</button>
</form>
</div>
</article>

In [5]:
price = all_articles[0].find('p', attrs={'class': 'price_color'})
print(float(price.get_text().split('£')[1]))

book_name = all_articles[0].find('h3').find('a').get('title')
print(book_name)

rating = all_articles[12].find('p').get('class')[1]
print(rating)

47.82
Sharp Objects
Three


In [6]:
def get_books(page):
    with open('books.csv', 'w') as f:
        writer = csv.DictWriter(f, fieldnames=['title', 'price', 'rate'])
        writer.writeheader()
        response = requests.request('GET', page)
        soup = BeautifulSoup(response.content, 'html.parser')
        all_articles = soup.find_all('article', attrs={'class': 'product_pod'})
        rates = {'One': 1, 'Two': 2, 'Three': 3, 'Four': 4, 'Five': 5}
        for article in all_articles:
            price = float(article.find('p', attrs={'class': 'price_color'}).get_text().split('£')[1])
            book_name = article.find('h3').find('a').get('title')
            rating = article.find('p').get('class')[1]
            writer.writerow({'title': book_name, 'price': price, 'rate': rates[rating]})
            print(f'Book name: {book_name}\nPrice: {price}\nRate: {rates[rating]}\n------')

In [7]:
get_books('http://books.toscrape.com/catalogue/category/books/science_22/index.html')

Book name: The Most Perfect Thing: Inside (and Outside) a Bird's Egg
Price: 42.96
Rate: 4
------
Book name: Immunity: How Elie Metchnikoff Changed the Course of Modern Medicine
Price: 57.36
Rate: 5
------
Book name: Sorting the Beef from the Bull: The Science of Food Fraud Forensics
Price: 44.74
Rate: 4
------
Book name: Tipping Point for Planet Earth: How Close Are We to the Edge?
Price: 37.55
Rate: 1
------
Book name: The Fabric of the Cosmos: Space, Time, and the Texture of Reality
Price: 55.91
Rate: 1
------
Book name: Diary of a Citizen Scientist: Chasing Tiger Beetles and Other New Ways of Engaging the World
Price: 28.41
Rate: 1
------
Book name: The Origin of Species
Price: 10.01
Rate: 4
------
Book name: The Grand Design
Price: 13.76
Rate: 3
------
Book name: Peak: Secrets from the New Science of Expertise
Price: 16.28
Rate: 2
------
Book name: The Elegant Universe: Superstrings, Hidden Dimensions, and the Quest for the Ultimate Theory
Price: 13.03
Rate: 4
------
Book name: The

## Read CSV file

In [8]:
books = pd.read_csv('books.csv')
books

Unnamed: 0,title,price,rate
0,The Most Perfect Thing: Inside (and Outside) a...,42.96,4
1,Immunity: How Elie Metchnikoff Changed the Cou...,57.36,5
2,Sorting the Beef from the Bull: The Science of...,44.74,4
3,Tipping Point for Planet Earth: How Close Are ...,37.55,1
4,"The Fabric of the Cosmos: Space, Time, and the...",55.91,1
5,Diary of a Citizen Scientist: Chasing Tiger Be...,28.41,1
6,The Origin of Species,10.01,4
7,The Grand Design,13.76,3
8,Peak: Secrets from the New Science of Expertise,16.28,2
9,"The Elegant Universe: Superstrings, Hidden Dim...",13.03,4


# categories file 

In [9]:
page_url = 'https://books.toscrape.com/'
req = requests.get('https://books.toscrape.com/')
soup = BeautifulSoup(req.content)
body = soup.body
cat_list = body.find('ul' , {'class':"nav nav-list"}).find('ul').find_all('li')
cat_link = {li.find('a').text.strip():page_url+li.find('a').attrs['href'] for li in cat_list}

In [10]:
cat_link

{'Travel': 'https://books.toscrape.com/catalogue/category/books/travel_2/index.html',
 'Mystery': 'https://books.toscrape.com/catalogue/category/books/mystery_3/index.html',
 'Historical Fiction': 'https://books.toscrape.com/catalogue/category/books/historical-fiction_4/index.html',
 'Sequential Art': 'https://books.toscrape.com/catalogue/category/books/sequential-art_5/index.html',
 'Classics': 'https://books.toscrape.com/catalogue/category/books/classics_6/index.html',
 'Philosophy': 'https://books.toscrape.com/catalogue/category/books/philosophy_7/index.html',
 'Romance': 'https://books.toscrape.com/catalogue/category/books/romance_8/index.html',
 'Womens Fiction': 'https://books.toscrape.com/catalogue/category/books/womens-fiction_9/index.html',
 'Fiction': 'https://books.toscrape.com/catalogue/category/books/fiction_10/index.html',
 'Childrens': 'https://books.toscrape.com/catalogue/category/books/childrens_11/index.html',
 'Religion': 'https://books.toscrape.com/catalogue/categor

In [11]:
for k , val in cat_link.items():
    

dict_items([('Travel', 'https://books.toscrape.com/catalogue/category/books/travel_2/index.html'), ('Mystery', 'https://books.toscrape.com/catalogue/category/books/mystery_3/index.html'), ('Historical Fiction', 'https://books.toscrape.com/catalogue/category/books/historical-fiction_4/index.html'), ('Sequential Art', 'https://books.toscrape.com/catalogue/category/books/sequential-art_5/index.html'), ('Classics', 'https://books.toscrape.com/catalogue/category/books/classics_6/index.html'), ('Philosophy', 'https://books.toscrape.com/catalogue/category/books/philosophy_7/index.html'), ('Romance', 'https://books.toscrape.com/catalogue/category/books/romance_8/index.html'), ('Womens Fiction', 'https://books.toscrape.com/catalogue/category/books/womens-fiction_9/index.html'), ('Fiction', 'https://books.toscrape.com/catalogue/category/books/fiction_10/index.html'), ('Childrens', 'https://books.toscrape.com/catalogue/category/books/childrens_11/index.html'), ('Religion', 'https://books.toscrape

## Good Job!