In [3]:
##Importing required libraries.

import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd
import os

In [24]:
##Linking to the webpage.

URL = 'https://books.toscrape.com/'
response = requests.get(URL)

page_contents = response.text

In [6]:
#Creating a file and loading the page contents in it.

with open('Bookswebpage.html','w') as f:
    f.write(page_contents)

In [7]:
##Using Beautiful Soup to extract information.

from bs4 import BeautifulSoup
doc = BeautifulSoup(page_contents,'html.parser')

In [13]:
##Getting book titles.

def get_book_titles(doc):
    Book_title_tags = doc.find_all('h3')
    Book_titles = []
    for tags in Book_title_tags:
        Book_titles.append(tags.text)
    return Book_titles

get_book_titles(doc)

['A Light in the ...',
 'Tipping the Velvet',
 'Soumission',
 'Sharp Objects',
 'Sapiens: A Brief History ...',
 'The Requiem Red',
 'The Dirty Little Secrets ...',
 'The Coming Woman: A ...',
 'The Boys in the ...',
 'The Black Maria',
 'Starving Hearts (Triangular Trade ...',
 "Shakespeare's Sonnets",
 'Set Me Free',
 "Scott Pilgrim's Precious Little ...",
 'Rip it Up and ...',
 'Our Band Could Be ...',
 'Olio',
 'Mesaerion: The Best Science ...',
 'Libertarianism for Beginners',
 "It's Only the Himalayas"]

In [12]:
##Getting book prices.

def get_book_price(doc):
    Book_price_tags = doc.find_all('p', class_ = 'price_color')
    Book_price = []
    for tags in Book_price_tags:
        Book_price.append(tags.text.replace('Â',''))
    return Book_price

get_book_price(doc)

['£51.77',
 '£53.74',
 '£50.10',
 '£47.82',
 '£54.23',
 '£22.65',
 '£33.34',
 '£17.93',
 '£22.60',
 '£52.15',
 '£13.99',
 '£20.66',
 '£17.46',
 '£52.29',
 '£35.02',
 '£57.25',
 '£23.88',
 '£37.59',
 '£51.33',
 '£45.17']

In [14]:
##Getting sotck availability.

def get_stock_availability(doc):
    Book_stock_tags = doc.find_all('p', class_ = 'instock availability')
    Book_stock = []
    for tags in Book_stock_tags:
        Book_stock.append(tags.text.strip())
    return Book_stock

get_stock_availability(doc)

['In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock',
 'In stock']

In [25]:
##Collecting title, prices,stock availability and urls
# from multiple pages and storing 
# in a Pandas DataFrame


def get_doc(url):
    response = requests.get(url)
    doc = BeautifulSoup(response.text,'html.parser')
    if response.status_code != 200:
        raise Exception('Failed to load page {}'.format(response))
    return doc


def scrape_multiple_pages(n):
    URL = 'https://books.toscrape.com/catalogue/page-'
    titles,prices,stocks_availability,urls = [],[],[],[]
    
    for page in range(1,n+1):
        doc = get_doc(URL + str(page)+ '.html')
        titles.extend(get_book_titles(doc))
        prices.extend(get_book_price(doc))
        stocks_availability.extend(get_stock_availability(doc))
        urls.extend(get_book_url(doc.find_all('h3')))
        
    book_dict1 = {
                'TITLE':titles,
                'PRICE':prices,
                'STOCK AVAILABILTY':stocks_availability,
                'URL':urls}
    return pd.DataFrame(book_dict1)

scrape_multiple_pages(5)

Unnamed: 0,TITLE,PRICE,STOCK AVAILABILTY,URL
0,A Light in the ...,£51.77,In stock,https://books.toscrape.com/a-light-in-the-atti...
1,Tipping the Velvet,£53.74,In stock,https://books.toscrape.com/tipping-the-velvet_...
2,Soumission,£50.10,In stock,https://books.toscrape.com/soumission_998/inde...
3,Sharp Objects,£47.82,In stock,https://books.toscrape.com/sharp-objects_997/i...
4,Sapiens: A Brief History ...,£54.23,In stock,https://books.toscrape.com/sapiens-a-brief-his...
...,...,...,...,...
95,Lumberjanes Vol. 3: A ...,£19.92,In stock,https://books.toscrape.com/lumberjanes-vol-3-a...
96,"Layered: Baking, Building, and ...",£40.11,In stock,https://books.toscrape.com/layered-baking-buil...
97,Judo: Seven Steps to ...,£53.90,In stock,https://books.toscrape.com/judo-seven-steps-to...
98,Join,£35.67,In stock,https://books.toscrape.com/join_902/index.html


In [22]:
##Creating a CSV file.

scrape_multiple_pages(5).to_csv('bookscrape.csv',index = None)