In [None]:
# importing libraries

# Beautifulsoup is a class from bs4 library used to parse HTML and XML documents and extract informations from them.
from bs4 import BeautifulSoup

#Simplifies the process of sending HTTP requests and handling the associated responses.
import requests

#Provides classes for working with date and time
import time
import datetime

#This module is part of the standard library and provides classes for sending emails using the Simple Mail Transfer Protocol (SMTP).
import smtplib

## data manipulation library:
import pandas as pd

import csv

In [None]:
#Connect to the website

url = 'https://www.amazon.es/lenguaje-programaci%C3%B3n-Python-principio-fin/dp/B0B5Q283BL/ref=sr_1_5?__mk_es_ES=%C3%85M%C3%85%C5%BD%C3%95%C3%91&crid=198J5VCRXBMSA&keywords=python&qid=1702808523&sprefix=pytho%2Caps%2C123&sr=8-5'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}

page = requests.get(url,headers=headers)

#pulling content from the webpage
soup1 = BeautifulSoup(page.content,"html.parser")
soup2 = BeautifulSoup(soup1.prettify(),"html.parser")

print(soup2)

In [None]:
# Find the element with the 'productTitle' id, get its text content, and remove leading/trailing whitespaces
title = soup2.find(id='productTitle').get_text(strip=True)
print(title)

# Find the element with the 'price' id, get its text content, and remove leading/trailing whitespaces
price = soup2.find(id='price').get_text(strip=True)
print(price)

# Find the element with the 'averageCustomerReviews' id, get its text content, and take the first three characters
rate = soup2.find(id='averageCustomerReviews').get_text(strip=True)[:3]
print(rate)

# Find all images in the document
images = soup2.find_all('img')

# Replace with the alt text you want to match
desired_alt_text = 'El lenguaje de programación Python de principio a fin'

# Iterate through the images and print details for the one with the desired alt text
for image in images:
    name = image.get('alt', '')
    link = image.get('src', '')

    if name.lower() == desired_alt_text.lower():
        print(name, link)
        break  # Stop the loop once the desired image is found

# Find the element with the 'bookDescription_feature_div' id, get its text content, and remove leading/trailing whitespaces
description = soup2.find(id='bookDescription_feature_div').get_text(strip=True)
print(description)

# Find the parent div with the 'detailBullets_feature_div' id
product_details = soup2.find('div', id='detailBullets_feature_div')

# Check if product_details exists
if product_details:
    # Find all span elements with the specified class within the parent div
    details = product_details.find_all('span', class_='a-list-item')
    
    # Iterate through the list under product details and print each element
    for data_element in details:
        text_content = data_element.get_text(strip=True)
        print(text_content)

# Find all li elements with the specified class within the document
relatedproduct = soup2.find_all('li', class_='a-carousel-card')

# Iterate through the list of related products
for carousel in relatedproduct:
    # Find the <a> tag within the current <li> element
    anchor_tag = carousel.find('a')

    # Check if an <a> tag is found
    if anchor_tag:
        # Extract and print the href attribute
        href_link = anchor_tag.get('href')
        print(f"Product: {carousel.get_text(strip=True)}, Href: {href_link}")


In [None]:

# Create a Timestamp for your output to track when data was collected

import datetime

today = datetime.date.today()

print(today)

In [None]:
#putting everything together
# Extracted data
title = soup2.find(id='productTitle').get_text(strip=True)
price = soup2.find(id='price').get_text(strip=True)
rate = soup2.find(id='averageCustomerReviews').get_text(strip=True)[:3]

images = soup2.find_all('img')
desired_alt_text = 'El lenguaje de programación Python de principio a fin'
image_link = None

for image in images:
    name = image.get('alt', '')
    link = image.get('src', '')

    if name.lower() == desired_alt_text.lower():
        image_link = link
        break

description = soup2.find(id='bookDescription_feature_div').get_text(strip=True)

product_details = soup2.find('div', id='detailBullets_feature_div')
details = [data_element.get_text(strip=True) for data_element in product_details.find_all('span', class_='a-list-item')]

related_products = []
relatedproduct = soup2.find_all('li', class_='a-carousel-card')
for carousel in relatedproduct:
    anchor_tag = carousel.find('a')
    if anchor_tag:
        href_link = anchor_tag.get('href')
        related_products.append({"Product": carousel.get_text(strip=True), "Href": href_link})

# Create a Timestamp for your output to track when data was collected
today = datetime.date.today()

#Create a Json file
import json
import datetime

# Your existing code for extracting data

# Create a dictionary to store the extracted data
data = {
    "title": title,
    "price": price,
    "rate": rate,
    "description": description,
    "image_link": image_link,
    "details": details,
    "related_products": related_products,
    "timestamp": str(today),
}

# Convert the dictionary to a JSON string
json_data = json.dumps(data, indent=2)

# Save the JSON data to a file
output_file_path = "AmazonScrapping.json"
with open(output_file_path, "w") as json_file:
    json_file.write(json_data)

print(f"Data saved to {output_file_path}")
    

In [None]:
# Runs check_price after a set time and inputs data into your CSV

while(True):
    check_price()
    time.sleep(86400)

In [None]:
# If uou want to try sending yourself an email (just for fun) when a price hits below a certain level you can try it
# out with this script

def send_mail():
    server = smtplib.SMTP_SSL('smtp.gmail.com',465)
    server.ehlo()
    #server.starttls()
    server.ehlo()
    server.login('seifhannachi57@gmail.com','xxxxxxxxxxxxxx')
    
    subject = "The book you want is below $15! Now is your chance to buy!"
    body = "Seif, This is the moment we have been waiting for. Now is your chance to pick up the book of your dreams. Don't mess it up! Link here: https://www.amazon.es/lenguaje-programaci%C3%B3n-Python-principio-fin/dp/B0B5Q283BL/ref=sr_1_5?__mk_es_ES=%C3%85M%C3%85%C5%BD%C3%95%C3%91&crid=198J5VCRXBMSA&keywords=python&qid=1702808523&sprefix=pytho%2Caps%2C123&sr=8-5"
   
    msg = f"Subject: {subject}\n\n{body}"
    
    server.sendmail(
        'seifhannachi57@gmail.com',
        msg
     
    )