### Imports

In [3]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np


### HTTP Request
*Store Website inside a variable*

In [4]:
website = "https://textbookcentre.com/catalogue/category/books/"

*Get Request*

In [5]:
response = requests.get(website)

*Status Code*

In [6]:
response.status_code

200

*Soup Object*

In [7]:
soup = BeautifulSoup(response.content, 'html.parser')

In [8]:
soup


<!DOCTYPE html>

<html lang="en" prefix="og: http://ogp.me/ns# product: http://ogp.me/ns/product#">
<head>
<title>
    Books | 
     Text Book Centre

</title>
<meta charset="utf-8"/>
<meta content="width=device-width, minimum-scale=0.25, maximum-scale=1.6, initial-scale=1.0" name="viewport"/>
<meta content="yes" name="apple-mobile-web-app-capable"/>
<meta content="
    
" name="description"/>
<link href="/static/img/favicon.166766220486.png" rel="icon" type="image/png"/>
<link href="https://fonts.googleapis.com/css?family=Lato:400,700&amp;subset=latin-ext" rel="stylesheet"/>
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
    new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
    j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
    'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
    })(window,document,'script','dataLayer','GTM-NRV96SJ');</script>
<meta content="Books" property="og:title"

### Results 

In [9]:
results = soup.find_all('div', {'class':'product-item d-flex col-6 col-sm-4 col-lg-3'})

In [10]:
len(results)

40

In [11]:
results[0]

<div class="product-item d-flex col-6 col-sm-4 col-lg-3">
<div class="card product-card">
<a href="/catalogue/atomic-habits-easy-and-proven-way-to-build-good-habits-and-break-bad-ones_27294/">
<div class="product-card-img-container">
<img alt="Atomic Habits: Easy and proven way to build good habits and break bad ones" class="img-fluid card-img-top product-card-img" loading="lazy" src="/media/cache/cf/88/cf88d9d5f84378e2784d61e2002853e7.jpg" srcset="/media/cache/cf/88/cf88d9d5f84378e2784d61e2002853e7.jpg 1x, /media/cache/cf/88/cf88d9d5f84378e2784d61e2002853e7@2x.jpg 2x"/>
</div>
<div class="card-body product-card-body mb-2">
<div>
<h5 class="card-title product-card-name">Atomic Habits: Easy and proven way to build …</h5>
<span class="text-muted small">James Clear</span>
</div>
<div class="w-100 d-flex justify-content-between align-items-center">
<div class="stockrecord-prices">
<span class="stockrecord-price-current">
                
                    KES 1,850
                
     

### Target Data 
* *Book Name*
* *Author*
* *Review Rating*
* *Review Count*
* *Product Link*
* *Book Details*

### Book Name 

In [12]:
results[0].find('h5', {'class': 'card-title product-card-name'}).get_text()


'Atomic Habits: Easy and proven way to build …'

### Author 

In [13]:
results[0].find('span', {'class':'text-muted small'})

<span class="text-muted small">James Clear</span>

### Price 

In [14]:
results[0].find('span', {'class':'stockrecord-price-current'}).get_text().strip()

'KES 1,850'

### Review Rating 

In [15]:
results[0].find('div', {'class':'product-card--rating small text-muted'}).get_text().strip()

'5'

### All Columns 

In [16]:
book_name = []

book_author = []

book_price = []

book_rating = []

for result in results:
    
#Name
    try:
        book_name.append(result.find('h5', {'class':'card-title product-card-name'}).get_text())
    except:
        book_name.append('nan')
        
#author

    try:
        book_author.append(result.find('span',{'class':'text-muted small'}).get_text())
    except:
        book_author.append(np.nan)

#price

    try:    
        book_price.append(result.find('span', {'class':'stockrecord-price-current'}).get_text().strip())
    except:
        book_price.append(np.nan)
        
#rating

    try:    
        book_rating.append(result.find('div', {'class':'product-card--rating small text-muted'}).get_text().strip())
    except:
        book_rating.append(np.nan)
        


### DataFrame 

In [23]:
book_overview = pd.DataFrame({'Name': book_name,'author': book_author,'price': book_price,'rating': book_rating})

In [24]:
book_overview

Unnamed: 0,Name,author,price,rating
0,Atomic Habits: Easy and proven way to build …,James Clear,"KES 1,850",5.0
1,The 48 Laws of Power,Robert Greene,"KES 2,450",5.0
2,"8 Rules of Love: How to Find it, …",JAY SHETTY,"KES 2,350",
3,Good News Bible,Bible society,KES 986,4.0
4,Across The Bridge by Mwangi Gicheru,Mwangi Gicheru,KES 754,5.0
5,The Light We Carry: Overcoming In Uncertain Ti...,Michelle Obama,"KES 2,750",
6,Outliers: The Story of Success (Small),Malcolm Gladwell,KES 890,5.0
7,Kamusi ya Karne ya 21,Longhorn,"KES 1,250",5.0
8,It Ends With Us: The emotional #1 Sunday …,Colleen Hoover,"KES 1,350",5.0
9,Vintage Classics: Little Women,Louisa May Alcott,KES 350,


In [28]:
### Export to excel

book_overview.to_excel('book_overview.xlsx', index=False)

In [36]:
## Pagination - Scrapping multiple pages

book_name = []
book_author = []
book_price = []
book_rating = []

for i in range(1, 117):

    #website
    website = "https://textbookcentre.com/catalogue/category/books/?page=" + str(i)

    #request
    response = requests.get(website)

    #soup
    soup = BeautifulSoup(response.content, 'html.parser')

    #results
    results = soup.find_all('div', {'class':'product-item d-flex col-6 col-sm-4 col-lg-3'})

#loop

    for result in results:
    #Name
        try:
            book_name.append(result.find('h5', {'class':'card-title product-card-name'}).get_text())
        except:
            book_name.append('nan')
            
    #author

        try:
            book_author.append(result.find('span',{'class':'text-muted small'}).get_text())
        except:
            book_author.append(np.nan)

    #price

        try:    
            book_price.append(result.find('span', {'class':'stockrecord-price-current'}).get_text().strip())
        except:
            book_price.append(np.nan)
            
    #rating

        try:    
            book_rating.append(result.find('div', {'class':'product-card--rating small text-muted'}).get_text().strip())
        except:
            book_rating.append(np.nan)
    
book_overview = pd.DataFrame({'Name': book_name,'author': book_author,'price': book_price,'rating': book_rating})

In [38]:
book_overview

Unnamed: 0,Name,author,price,rating
0,Atomic Habits: Easy and proven way to build …,James Clear,"KES 1,850",5
1,The 48 Laws of Power,Robert Greene,"KES 2,450",5
2,"8 Rules of Love: How to Find it, …",JAY SHETTY,"KES 2,350",
3,Good News Bible,Bible society,KES 986,4
4,Across The Bridge by Mwangi Gicheru,Mwangi Gicheru,KES 754,5
...,...,...,...,...
4635,Look Inside What Happens When You Eat,Stefano Gatti,"KES 1,290",
4636,Disney Tangled The Series Mosaic Sticker by Nu...,,KES 350,
4637,Bug Club Guided Non Fiction Year 1 Green …,Llewellyn,KES 450,
4638,Let's Celebrate - Read It Yourself with Ladybi...,Randall,KES 200,


In [None]:
book_overview.to_excel('book_overview2.xlsx', index=False)