#  Welcome to my Web Scraper Project

## Project objectives:

1. Web scrape an ecommerce site to collect the desired data from a product
2. Clean that data into a more desirable format
3. Export that data into a csv file
4. Write a function that uses a timer to automatically append the csv file
5. Write a function that sends an email when the price has dropped below a desired price point

In [None]:
#Import necessary libraries
from bs4 import BeautifulSoup
import requests
import smtplib
import time
import datetime
import csv
import pandas as pd

In [None]:
#Connect to website

URL = 'https://spikeball.com/collections/best-sellers/products/spikeball-pro-set'

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36"}

page = requests.get(URL, headers=headers)

soup = BeautifulSoup(page.content, "html.parser")

#Collect the data that we want

title = soup.find('h1', class_='product-info-title').get_text()

price = soup.find('p', class_='price').get_text()

rating = soup.find('span', class_="stamped-summary-text-1").get_text()


print(title)
print(price)
print(rating + ' out of 5 stars')

In [None]:
#Clean up data points for easier use later

title0 = title.strip()
price0 = float(price.strip()[1:-4])

print(title0)
print(price0)

In [None]:
#Take a timestamp to know when we last collected this data

today = datetime.date.today()
print(today)

In [None]:
#Create and write to a csv file

header = ['Date', 'Title', 'Price', 'Rating']
data = [today, title0, price0, rating]

with open('SpikeballWebScraperDataset.csv', 'w', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerow(data)

In [None]:
#Import csv into pandas dataframe to read here without having to open the csv file

df = pd.read_csv('SpikeballWebScraperDataset.csv')
print(df)

### The above code was just to walk through the process and create the initial csv file. Once the csv is created, only the below code needs to be run. Running the above code will recreate the csv file and overwrite any data that may have been stored there.

In [None]:
#Combine all of the above code into one function that appends the csv by adding a new row

def check_price():
    URL = 'https://spikeball.com/collections/best-sellers/products/spikeball-pro-set'

    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36"}

    page = requests.get(URL, headers=headers)

    soup = BeautifulSoup(page.content, "html.parser")

#Collect the data that we want

    title = soup.find('h1', class_='product-info-title').get_text()

    price = soup.find('p', class_='price').get_text()

    rating = soup.find('span', class_="stamped-summary-text-1").get_text()

#Clean up data points for easier use later

    title0 = title.strip()
    price0 = float(price.strip()[1:-4])

#Take a timestamp to know when we last collected this data

    today = datetime.date.today()

#Write to a csv file

    header = ['Date', 'Title', 'Price', 'Rating']
    data = [today, title0, price0, rating]

#Now we are appending data to the csv

    with open('SpikeballWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:
        writer = csv.writer(f)
        writer.writerow(data)
        
#Sends me an email if the price drops below $100

    if price0 < 100:
        send_email()

In [None]:
#Runs check_price after a set time and inputs data into the CSV (let's say once a day in this example)

while(True):
    check_price()
#In seconds, there are 86,400 seconds in a day
    time.sleep(86400)

In [None]:
#Import csv into pandas dataframe to once again display the data without having to open the CSV

df = pd.read_csv('SpikeballWebScraperDataset.csv')
print(df)

In [None]:
#Utilizes the smtplib package to send an email to myself (just for fun) when a price hits below a certain level

def send_email():
    server = smtplib.SMTP_SSL('smtp.gmail.com',465)
    server.ehlo()
    #server.starttls()
    server.ehlo()
    server.login('email@gmail.com', 'email_password')
    
    subject = "The Spikeball Pro Kit is below $100! Now is your chance to buy!"
    body = "(Insert person name), this is the moment you have been waiting for. Click this link here to buy now: https://spikeball.com/collections/best-sellers/products/spikeball-pro-set"
   
    msg = "Subject: {}\n\n{}".format(subject, body)
    print(msg)
    
    server.sendmail('email@gmail.com', 'emailyouwanttosendto@gmail.com', msg)