# Amazon Price-Tracker

This project is about webscraping with python. The goal is to scrap data from a website (in our case amazon.com) for a specific product. The data can be anything which can be found on the static site like price, ratings, productname. After scraping the data, it will be stored in a csv-file. The process of scraping can be automated with a function. First we build the project up step by step and implement everything in a function to run it easily. The next step is building a function in order to get a notification via mail, when a specific price is reached. 

In [174]:
# import libraries
from bs4 import BeautifulSoup
import requests
import time
import datetime
import smtplib
import csv
import datetime
import pandas as pd

## Building the project step by step

In [161]:
# Connect to Website
URL = 'https://www.amazon.de/Apple-Watch-Aluminiumgehäuse-Space-Sportarmband-Schwarz/dp/B08J6TQP1B/ref=sr_1_3?__mk_de_DE=ÅMÅŽÕÑ&crid=10A3K0UHADJBU&dchild=1&keywords=apple+watch+se&qid=1631281226&sprefix=apple+w%2Caps%2C507&sr=8-3'

# User-Agent - need to do for own computer
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
page = requests.get(URL, headers=headers)
soup1 = BeautifulSoup(page.content, 'html.parser')
soup2 = BeautifulSoup(soup1.prettify(), 'html.parser')

# get the title of the product
title = soup2.find(id='productTitle').get_text()

# get the price of the product
price = soup2.find(id='priceblock_ourprice').get_text()

# get the ratings of the product
ratings = soup2.find(id='averageCustomerReviews').get_text()

# get the number of ratings for the product
numb_ratings = soup2.find(id='acrCustomerReviewText').get_text()


In [162]:
# clean the data
price = float(price.strip().replace(',', '.')[:6])
title = title.strip().replace('\xa0', '')
rating = ratings.strip()[:3]
numb_rating = numb_ratings.strip()[:5]

In [163]:
# check the cleaned data
print(title)
print(price)
print(rating)
print(numb_rating)

AppleWatch SE (GPS, 40mm) Aluminiumgehäuse Space Grau, Sportarmband Schwarz
275.0
4,8
6.973


In [164]:
# set a datetime value
today = datetime.date.today()
print(today)

2021-09-11


In [165]:
# write the collected data into a csv-file

header = ['Title', 'Price', 'Rating', 'Num_Ratings', 'Date']
data = [title, price, rating, numb_rating, today]

#with open('Amazon-Web-Scraper-Dataset.csv', 'w', newline='', encoding='UTF8') as f:
    #writer = csv.writer(f)
    #writer.writerow(header)
    #writer.writerow(data)

In [166]:
# read in the generated dataset
df = pd.read_csv('Amazon-Web-Scraper-Dataset.csv')

In [167]:
df

Unnamed: 0,Title,Price,Rating,Num_Ratings,Date
0,"AppleWatch SE (GPS, 40mm) Aluminiumgehäuse Spa...",275.0,48,6.973,2021-09-11


# Build a automated Process for the Price-Tracker

In [179]:
# build a function which sends a notification to your email

def send_mail():
    #define the client and set a smtp connection
    server = smtplib.SMTP_SSL('smtp.gmail.com', 465)
    #identify yourself to the esmpt server
    server.ehlo()
    #login with your credentials
    server.login('xxxxxxxxxxx@gmail.com', 'xxxxxxxxxx') #put in email and password
    
    subject = 'Cheap Apple-Watch SE offer'
    body = "Tobias, this is the moment to buy the Apple Watch - dont wait, just grab it\n Here is the link to the offer: "
    msg = f"Subject: {subject}\n\n{body}"
    
    #send the mail to your account
    server.sendmail(to_addrs='xxxxxxxxxxxx@gmail.com', from_addr ='xxxxxxxxxxx@gmail.com', msg=msg)

In [180]:
# build a function which does the steps before all along and checks the price 

def check_price():
    # Connect to Website
    URL = 'https://www.amazon.de/Apple-Watch-Aluminiumgehäuse-Space-Sportarmband-Schwarz/dp/B08J6TQP1B/ref=sr_1_3?__mk_de_DE=ÅMÅŽÕÑ&crid=10A3K0UHADJBU&dchild=1&keywords=apple+watch+se&qid=1631281226&sprefix=apple+w%2Caps%2C507&sr=8-3'

    # User-Agent - need to do for own computer
    headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
    page = requests.get(URL, headers=headers)
    soup1 = BeautifulSoup(page.content, 'html.parser')
    soup2 = BeautifulSoup(soup1.prettify(), 'html.parser')

    # get the title of the product
    title = soup2.find(id='productTitle').get_text()

    # get the price of the product
    price = soup2.find(id='priceblock_ourprice').get_text()

    # get the ratings of the product
    ratings = soup2.find(id='averageCustomerReviews').get_text()

    # get the number of ratings for the product
    numb_ratings = soup2.find(id='acrCustomerReviewText').get_text()
    
    # transform the data
    # clean the data
    price = float(price.strip().replace(',', '.')[:6])
    title = title.strip().replace('\xa0', '')
    rating = ratings.strip()[:3]
    numb_rating = numb_ratings.strip()[:5]
    
    import datetime
    today = datetime.date.today()
    
    import csv
    header = ['Title', 'Price', 'Rating', 'Num_Ratings', 'Date']
    data = [title, price, rating, numb_rating, today]
    
    with open('Amazon-Web-Scraper-Dataset.csv', 'a+', newline='', encoding='UTF8') as f:
        writer = csv.writer(f)
        writer.writerow(data)
        
    if(price < 280):
        send_mail()
    
    

In [181]:
# set a timer to check the price for a certain time limit

while(True):
    check_price()
    time.sleep(86400) #it is measured in seconds

KeyboardInterrupt: 