In [112]:
#Importing Necessary Libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [113]:
webURL = f"https://books.toscrape.com/"

#Retrieving the name, price, availability of the book
nameList = []
pricesList = []
stockList = []

req = requests.get(webURL)
if req.status_code == 200:

    soup = BeautifulSoup(req.text, 'html.parser')

    prices = soup.find_all('p', class_='price_color')
    names = soup.find_all('h3')
    stocks = soup.find_all('p', class_='instock availability')

    for price in prices:
        pricesList.append(price.get_text())

    for name in names:
        nameList.append(name.get_text())

    for stock in stocks:
        stockList.append(stock.get_text().strip())
else:
    print("Error while parsing HTML Code")

finalPriceList = [price[1:] for price in pricesList]


In [114]:
webURL = "https://books.toscrape.com/"
linksList = []

#Retrieving the link of each book

req = requests.get(webURL)
if req.status_code == 200:

    soup = BeautifulSoup(req.text, 'html.parser')

    headerTags = soup.find_all('h3')

    for tag in headerTags:

        aTag = tag.find('a')

        link = aTag['href'] 
        
        full_link = webURL + link
        linksList.append(full_link)
else:
    print("Error while parsing HTML Code")

print(linksList)


['https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html', 'https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html', 'https://books.toscrape.com/catalogue/soumission_998/index.html', 'https://books.toscrape.com/catalogue/sharp-objects_997/index.html', 'https://books.toscrape.com/catalogue/sapiens-a-brief-history-of-humankind_996/index.html', 'https://books.toscrape.com/catalogue/the-requiem-red_995/index.html', 'https://books.toscrape.com/catalogue/the-dirty-little-secrets-of-getting-your-dream-job_994/index.html', 'https://books.toscrape.com/catalogue/the-coming-woman-a-novel-based-on-the-life-of-the-infamous-feminist-victoria-woodhull_993/index.html', 'https://books.toscrape.com/catalogue/the-boys-in-the-boat-nine-americans-and-their-epic-quest-for-gold-at-the-1936-berlin-olympics_992/index.html', 'https://books.toscrape.com/catalogue/the-black-maria_991/index.html', 'https://books.toscrape.com/catalogue/starving-hearts-triangular-trade-trilogy-1_99

In [115]:
infoOfBook = []

#Acquiring more details about each book by parsing the HTML content of each book's page
for link in linksList:

    req = requests.get(link)

    if(req.status_code==200):

        soup = BeautifulSoup(req.text, 'html.parser')
        tableOfInfo = soup.find_all('table',class_='table table-striped')

        for info in tableOfInfo:
            tableRows = info.find_all('tr')

        for row in tableRows:
            tableValue = info.find('td')

        infoOfBook.append(tableValue.get_text())
    

    else:
        print("Error while parsing HTML Code")

print(infoOfBook)


        

['a897fe39b1053632', '90fa61229261140a', '6957f44c3847a760', 'e00eb4fd7b871a48', '4165285e1663650f', 'f77dbf2323deb740', '2597b5a345f45e1b', 'e72a5dfc7e9267b2', 'e10e1e165dc8be4a', '1dfe412b8ac00530', '0312262ecafa5a40', '30a7f60cd76ca58c', 'ce6396b0f23f6ecc', '3b1c02bac2a429e6', 'a34ba96d4081e6a4', 'deda3e61b9514b83', 'feb7cc7701ecf901', 'e30f54cea9b38190', 'a18a4f574854aced', 'a22124811bfa8350']


In [116]:
#Creating a dataframe using Pandas to be able to create the CSV file and displaying first 10 rows
df = pd.DataFrame({
    'ID':infoOfBook,
    'Name':nameList,
    'price':finalPriceList,
    'Stock':stockList,
    'LinkOfBook':linksList
})

df.head(10)

Unnamed: 0,ID,Name,price,Stock,LinkOfBook
0,a897fe39b1053632,A Light in the ...,£51.77,In stock,https://books.toscrape.com/catalogue/a-light-i...
1,90fa61229261140a,Tipping the Velvet,£53.74,In stock,https://books.toscrape.com/catalogue/tipping-t...
2,6957f44c3847a760,Soumission,£50.10,In stock,https://books.toscrape.com/catalogue/soumissio...
3,e00eb4fd7b871a48,Sharp Objects,£47.82,In stock,https://books.toscrape.com/catalogue/sharp-obj...
4,4165285e1663650f,Sapiens: A Brief History ...,£54.23,In stock,https://books.toscrape.com/catalogue/sapiens-a...
5,f77dbf2323deb740,The Requiem Red,£22.65,In stock,https://books.toscrape.com/catalogue/the-requi...
6,2597b5a345f45e1b,The Dirty Little Secrets ...,£33.34,In stock,https://books.toscrape.com/catalogue/the-dirty...
7,e72a5dfc7e9267b2,The Coming Woman: A ...,£17.93,In stock,https://books.toscrape.com/catalogue/the-comin...
8,e10e1e165dc8be4a,The Boys in the ...,£22.60,In stock,https://books.toscrape.com/catalogue/the-boys-...
9,1dfe412b8ac00530,The Black Maria,£52.15,In stock,https://books.toscrape.com/catalogue/the-black...


In [117]:
#One Hot Encoding the 'Stock' Column
stockOneHot = pd.get_dummies(df['Stock'],prefix='Is',dtype=int)

finalDF = pd.concat([df.drop('Stock',axis=1),stockOneHot],axis=1)

finalDF.head(10)

Unnamed: 0,ID,Name,price,LinkOfBook,Is_In stock
0,a897fe39b1053632,A Light in the ...,£51.77,https://books.toscrape.com/catalogue/a-light-i...,1
1,90fa61229261140a,Tipping the Velvet,£53.74,https://books.toscrape.com/catalogue/tipping-t...,1
2,6957f44c3847a760,Soumission,£50.10,https://books.toscrape.com/catalogue/soumissio...,1
3,e00eb4fd7b871a48,Sharp Objects,£47.82,https://books.toscrape.com/catalogue/sharp-obj...,1
4,4165285e1663650f,Sapiens: A Brief History ...,£54.23,https://books.toscrape.com/catalogue/sapiens-a...,1
5,f77dbf2323deb740,The Requiem Red,£22.65,https://books.toscrape.com/catalogue/the-requi...,1
6,2597b5a345f45e1b,The Dirty Little Secrets ...,£33.34,https://books.toscrape.com/catalogue/the-dirty...,1
7,e72a5dfc7e9267b2,The Coming Woman: A ...,£17.93,https://books.toscrape.com/catalogue/the-comin...,1
8,e10e1e165dc8be4a,The Boys in the ...,£22.60,https://books.toscrape.com/catalogue/the-boys-...,1
9,1dfe412b8ac00530,The Black Maria,£52.15,https://books.toscrape.com/catalogue/the-black...,1


In [118]:
#Saving the dataframe to a CSV file
finalDF.to_csv('result.csv',index=False)