Automated Web Scraper

In [6]:
from bs4 import BeautifulSoup
import requests

In [12]:
url = 'https://coinmarketcap.com/currencies/bitcoin/'

page = requests.get(url)

soup = BeautifulSoup(page.text, 'html')

print(soup.prettify(formatter=None)[:512])

<!DOCTYPE html>
<html dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <meta content="ie=edge" http-equiv="x-ua-compatible"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no, shrink-to-fit=no" name="viewport"/>
  <link href="/manifest.json" rel="manifest"/>
  <script type="application/ld+json">
   {"@context":"https://schema.org/","@type":"Product","name":"Bitcoin","aggregateRating":{"@type":"AggregateRating","ratingValue":4.9,"bestRating":5,"ratingCount":2,"i


In [18]:
soup.find('span', class_='sc-65e7f566-0 lsTl')

<span class="sc-65e7f566-0 lsTl" data-role="coin-name" title="Bitcoin">Bitcoin<span class="sc-65e7f566-0 eQBACe coin-name-mobile"> price</span></span>

In [30]:
html = '<span class="sc-65e7f566-0 lsTl">Bitcoin</span>'
#is simply creating a string. The string looks like some HTML code, but for now, it's just text stored in the variable html

In [36]:
crypto_name = soup.find('span', class_='sc-65e7f566-0 lsTl').get_text(strip=True)
print(crypto_name)

Bitcoinprice


In [34]:
crypto_name = soup.find('span', class_='sc-65e7f566-0 lsTl').text
print(crypto_name)

Bitcoin price


In [38]:
original = "Bitcoinprice"
bitcoin_only = original.replace("price", "")
print(bitcoin_only)  # Output: Bitcoin

Bitcoin


In [59]:
price_span = soup.find('span', class_='sc-65e7f566-0 clvjgF base-text')
crypto_price = price_span.text if price_span else None
print(crypto_price)  # Output: $63,316.34

$63,239.97


In [57]:
price_span = soup.find('span', class_='sc-65e7f566-0 clvjgF base-text')
print(price_span)

<span class="sc-65e7f566-0 clvjgF base-text" data-test="text-cdp-price-display">$63,239.97</span>


In [61]:
if crypto_price:
    crypto_price = float(crypto_price.replace(',', '').replace('$', ''))
print(crypto_price)  # Output: 63316.34
#check if crypto_price is not empty.
#remove commas and $, then convert it to a float.
#print the number: Shows the price as 63316.34.
#float() converts a string or an integer

63239.97


In [69]:
price_span = soup.find('span', class_='sc-65e7f566-0 clvjgF base-text')
crypto_price = price_span.text if price_span else None

if crypto_price:
    final_price = float(crypto_price.replace(',', '').replace('$', ''))
else:
    final_price = None

print(final_price)  # Output: 63316.34


63239.97


In [71]:
import pandas as pd

In [87]:
from datetime import datetime
date_time = datetime.now()
print(date_time)

2024-09-23 18:57:21.290382


In [91]:
dict = {'Crypto Name': crypto_name,
        'Price':final_price,
        'TimeStamp':date_time }

df = pd.DataFrame([dict])
df

Unnamed: 0,Crypto Name,Price,TimeStamp
0,Bitcoinprice,63239.97,2024-09-23 18:57:21.290382


In [101]:
df.to_csv(r'C:\Users\Marta\Python Tutorial\Crypto Web Puller\Crypto_Automated_Pull.csv', index=False)

In [111]:
import os
if os.path.exists(r'C:\Users\Marta\Python Tutorial\Crypto Web Puller\Crypto_Automated_Pull.csv'):
    df.to_csv(r'C:\Users\Marta\Python Tutorial\Crypto Web Puller\Crypto_Automated_Pull.csv', mode='a', header = False)
#The code checks if a CSV file exists. If it does, it adds new data from df to the end of the file, keeping all the old data
else:
     df.to_csv(r'C:\Users\Marta\Python Tutorial\Crypto Web Puller\Crypto_Automated_Pull.csv')

Putting all together

In [None]:
import time
automated_crypto_pull()
time.sleep(10)

In [12]:
import requests  # To send HTTP requests to get webpage data
import pandas as pd  # For handling data in tables (DataFrames)
from bs4 import BeautifulSoup  # To parse HTML and extract data
from datetime import datetime  # To work with dates and times
import os  # To interact with the operating system (like file paths)
import time  # To add delays in the code (like sleep)

def automated_crypto_pull():
    url = 'https://coinmarketcap.com/currencies/bitcoin/'

    try:
        # Get the webpage
        page = requests.get(url)
        print(f"Status Code: {page.status_code}")  # Show if we got the page

        if page.status_code != 200:
            print("Failed to retrieve data.")
            return  # Stop if there's an error

        # Parse the HTML
        soup = BeautifulSoup(page.text, 'html.parser')

        # Find the crypto name
        crypto_name = soup.find('span', class_='sc-65e7f566-0 lsTl')
        if crypto_name:
            crypto_name = crypto_name.text.strip()  # Clean up the text
            print(f"Crypto Name: {crypto_name}")
        else:
            print("Crypto name not found.")
            return

        # Find the price
        price_span = soup.find('span', class_='sc-65e7f566-0 clvjgF base-text')
        if price_span:
            crypto_price = price_span.text.strip()  # Get the price
            print(f"Raw Price: {crypto_price}")
            final_price = float(crypto_price.replace(',', '').replace('$', ''))  # Clean and convert to float
        else:
            print("Price not found.")
            return

        # Get the current time
        date_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

        # Create a dictionary for the data
        data_dict = {
            'Crypto Name': crypto_name,
            'Price': final_price,
            'TimeStamp': date_time
        }
        print(f"Data to be saved: {data_dict}")

        # Create a DataFrame
        df = pd.DataFrame([data_dict])

        # Save to CSV
        file_path = r'C:\Users\Marta\Python Tutorial\Crypto Web Puller\Crypto_Automated_Pull.csv'
        if os.path.exists(file_path):
            df.to_csv(file_path, mode='a', header=False, index=False)  # Append if exists
        else:
            df.to_csv(file_path, index=False)  # Create new file

    except Exception as e:
        print(f"An error occurred: {e}")

# Run the function and wait a moment
automated_crypto_pull()
time.sleep(2)  # Wait for 2 seconds

Status Code: 200
Crypto Name: Bitcoin price
Raw Price: $63,533.99
Data to be saved: {'Crypto Name': 'Bitcoin price', 'Price': 63533.99, 'TimeStamp': '2024-09-23 19:51:42'}
