## Scraping Book's title and price from Amazon

In [2]:
# Importing all the required libraries

import requests as re
from bs4 import BeautifulSoup
import datetime
import pandas as pd

In [3]:
# Getting all the required data as html 

url = 'https://www.amazon.in/s?i=stripbooks&bbn=976389031&rh=n%3A976389031%2Cp_85%3A10440599031%2Cp_n_availability%3A1318484031&pf_rd_i=976389031&pf_rd_m=A1VBAL9TL5WCBF&pf_rd_p=d9156ff5-f4fe-4d30-acc4-be8a85626096&pf_rd_r=RPJ07AB4GA6NNQH82SE9&pf_rd_s=merchandised-search-4&ref=AF_WIN_bub_w_cml_t_1'

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.203"}

webpage = re.get(url, headers=headers)

soup1 =BeautifulSoup(webpage.content, 'lxml')

soup2 = BeautifulSoup(soup1.prettify(), 'lxml')

link_title = "a-size-medium a-color-base a-text-normal"

link_price = "a-price-whole"

# Finding specific tags and attributes

title = soup2.find_all('span', attrs={'class':link_title})

price = soup2.div.find_all('span', attrs={'class':link_price})


print(price)
print(title)



[<span class="a-price-whole">
                           259
                          </span>, <span class="a-price-whole">
                           399
                          </span>, <span class="a-price-whole">
                           386
                          </span>, <span class="a-price-whole">
                             353
                             <span class="a-price-decimal">
                              .
                             </span>
</span>, <span class="a-price-whole">
                             0
                            </span>, <span class="a-price-whole">
                           265
                          </span>, <span class="a-price-whole">
                             189
                             <span class="a-price-decimal">
                              .
                             </span>
</span>, <span class="a-price-whole">
                             0
                            </span>, <span class="a-price-whol

In [15]:
'''Appending prices of books to an empty list'''

price_lst = []

for cost in price:
    price_lst.append(cost.text.strip())


print(price_lst)

'''Appending titles of books to an empty list'''

title_lst = []

for i in title:
    title_lst.append(i.text.strip())

print(title_lst)


['259', '399', '386', '353\n                             \n                              .', '0', '265', '189\n                             \n                              .', '0', '259', '164\n                             \n                              .', '0', '292', '148', '78', '699', '184', '173\n                             \n                              .', '0', '115', '15', '0', '299', '127', '19', '98', '294', '360', '165', '95\n                             \n                              .', '0', '335', '384\n                             \n                              .', '0', '89']
['Hindus in Hindu Rashtra (Eighth-Class Citizens and Victims of State-Sanctioned Apartheid)', 'My First Mini Library : Boxset of 10 Board Books', 'Atomic Habits: the life-changing million-copy #1 bestseller', 'Ikigai', 'The Psychology Of Money', 'Shrimad Bhagwat Geeta Yatharoop', 'Indian Polity for UPSC (English| 7th Edition) |Civil Services Exam| State Administrative Exams', "Don't Believe Eve

In [16]:
# Creating a dictionary with 2 lists scraped before

dict = {title_lst[key] : price_lst[key] for key in range(len(title_lst))}

dict

{'Hindus in Hindu Rashtra (Eighth-Class Citizens and Victims of State-Sanctioned Apartheid)': '259',
 'My First Mini Library : Boxset of 10 Board Books': '399',
 'Atomic Habits: the life-changing million-copy #1 bestseller': '386',
 'Ikigai': '353\n                             \n                              .',
 'The Psychology Of Money': '0',
 'Shrimad Bhagwat Geeta Yatharoop': '265',
 'Indian Polity for UPSC (English| 7th Edition) |Civil Services Exam| State Administrative Exams': '189\n                             \n                              .',
 "Don't Believe Everything You Think (English)": '0',
 'The Power of Your Subconscious Mind': '259',
 'Colouring Books for Kids (Pack of 12 Books)': '164\n                             \n                              .',
 'Do It Today: Overcome procrastination, improve productivity and achieve more meaningful things [Paperback] Foroux, Darius': '0',
 'Brain Activity Book for Kids - 200+ Activities for Age 3+': '292',
 'BlackBook of Engli

In [17]:
'''Getting the date using datetime'''
today = datetime.date.today()

In [20]:
# Writing the data into csv file

import csv

heading = ['Date','Product', 'Price']
data = [today, title_lst, price_lst]

'''Writing the headings into a csv file'''

with open('AmazonScrape.csv', 'w', newline='', encoding='UTF8') as c:
    csvwriter = csv.writer(c)
    csvwriter.writerow(heading)

In [21]:
'''Appending all the values into the csv file'''
with open('AmazonScrape.csv', 'a+', newline='', encoding='UTF8') as c:
    for key, value in dict.items():
    
        data = [today, key, value]
        csvwriter = csv.writer(c)
        csvwriter.writerow(data)


In [31]:
df = pd.read_csv("AmazonScrape.csv")
df.head()

Unnamed: 0,Date,Product,Price
0,2023-08-22,Hindus in Hindu Rashtra (Eighth-Class Citizens...,259
1,2023-08-22,My First Mini Library : Boxset of 10 Board Books,399
2,2023-08-22,Atomic Habits: the life-changing million-copy ...,386
3,2023-08-22,Ikigai,353\n \n ...
4,2023-08-22,The Psychology Of Money,0


In [32]:
'''Using Regex to remove the newlines'''

import re
df['Price'] = df['Price'].apply(lambda x : re.sub(r'\n', '', x))

In [34]:
df['Price'] = df['Price'].apply(lambda x : re.sub(r' ', '', x))

In [35]:
df.Price

0      259
1      399
2      386
3     353.
4        0
5      265
6     189.
7        0
8      259
9     164.
10       0
11     292
12     148
13      78
14     699
15     184
Name: Price, dtype: object

In [38]:
df.to_string

<bound method DataFrame.to_string of           Date                                            Product Price
0   2023-08-22  Hindus in Hindu Rashtra (Eighth-Class Citizens...   259
1   2023-08-22   My First Mini Library : Boxset of 10 Board Books   399
2   2023-08-22  Atomic Habits: the life-changing million-copy ...   386
3   2023-08-22                                             Ikigai  353.
4   2023-08-22                            The Psychology Of Money     0
5   2023-08-22                    Shrimad Bhagwat Geeta Yatharoop   265
6   2023-08-22  Indian Polity for UPSC (English| 7th Edition) ...  189.
7   2023-08-22       Don't Believe Everything You Think (English)     0
8   2023-08-22                The Power of Your Subconscious Mind   259
9   2023-08-22        Colouring Books for Kids (Pack of 12 Books)  164.
10  2023-08-22  Do It Today: Overcome procrastination, improve...     0
11  2023-08-22  Brain Activity Book for Kids - 200+ Activities...   292
12  2023-08-22  BlackBook o

In [37]:
df.to_csv('AmazonScrapeFinal.csv')