In [1]:
from bs4 import BeautifulSoup as soup
from splinter import Browser
import pandas as pd
import random as rand
import matplotlib.pyplot as plt

In [21]:
def generate_random(crypto_amount):
    """
    Args: 
        crypto_amount: number of cryptocurrencies to analyze

    Returns:
        data: DataFrame of randomly selected cryptocurrency data
    """

    # visit yahoo finance crypto page and get the value of the last listed crypto to establish maximum range
    base_page = "https://finance.yahoo.com/markets/crypto/all/"
    browser = Browser('chrome')
    browser.visit(base_page)
    html = browser.html
    souped = soup(html, "html.parser")
    max_val = int(souped.find("div", class_="total yf-1tdhqb1").text.split(" of ")[1]) + 1

    # use max range to pick random indexes from overall crypto list
    selections = []
    while len(selections) < crypto_amount:
        pick_num = rand.randint(0, max_val)
        if pick_num not in selections:
            selections.append(pick_num)

    # list to hold all crypto data
    dict_list = []
    
    # loop through all random selections and go to historic data page for each selection
    for selection in selections:
        option = f"?start={selection}&count=1"
        browser.visit(base_page + option)
        html = browser.html
        souped = soup(html, "html.parser")
        td = souped.find("td")
        ticker_container = td.find("a")
        href = ticker_container["href"]
        browser.links.find_by_href(href).click()
        browser.links.find_by_partial_href("history").click()
        
        flag = True
        while flag:    #loop to handle cases when tags are pulled before page fully loads
            try:
                # scrape page for each crypto
                html_to_scrape = browser.html
                soup_scrape = soup(html_to_scrape, "html.parser")
                table = soup_scrape.find("table")
                thead = table.find("thead")
                header_tags = thead.find_all("th")
                headers = [head.text.split(" ")[0] for head in header_tags]
                page_headers = soup_scrape.find_all("h1")
                coin = page_headers[1].text.split(" USD ")[0]
                table_body = table.find("tbody")
                table_rows = table_body.find_all("tr")
                for row in table_rows:
                    tds = [td.text for td in row.find_all("td")]
                    dict = {headers[i]: tds[i] for i in range(len(headers))}
                    dict["Currency"] = coin
                    dict_list.append(dict)
                flag = False
            except:
                flag = True
    browser.quit()
    data = pd.DataFrame(dict_list)
    return data
    
    

In [22]:
# Generate Data for 5 Cryptocurrencies
five_cryptos = generate_random(5)
five_cryptos.info()

Unnamed: 0,Date,Open,High,Low,Close,Adj,Volume,Currency
0,"Aug 27, 2024",0.000292,0.000293,0.000284,0.000284,0.000284,1910,Solamander
1,"Aug 26, 2024",0.000297,0.000299,0.000287,0.000292,0.000292,5958,Solamander
2,"Aug 25, 2024",0.000234,0.000299,0.000231,0.000297,0.000297,16945,Solamander
3,"Aug 24, 2024",0.000224,0.000238,0.000224,0.000234,0.000234,1704,Solamander
4,"Aug 23, 2024",0.000220,0.000225,0.000215,0.000224,0.000224,1664,Solamander
...,...,...,...,...,...,...,...,...
1034,"Aug 31, 2023",0.000020,0.000020,0.000020,0.000020,0.000020,1,BonFi
1035,"Aug 30, 2023",0.000020,0.000020,0.000020,0.000020,0.000020,1,BonFi
1036,"Aug 29, 2023",0.000020,0.000020,0.000020,0.000020,0.000020,1,BonFi
1037,"Aug 28, 2023",0.000020,0.000020,0.000020,0.000020,0.000020,1,BonFi


In [25]:
# Clean Data and Narrow Down Rows to Common Dates
five_cryptos["Date"] = pd.to_datetime(five_cryptos["Date"])
five_cryptos["Volume"] = five_cryptos["Volume"].str.replace(",", "")
five_cryptos["Volume"] = five_cryptos["Volume"].str.replace("-", "0")
five_cryptos = five_cryptos.astype({"Open": "float", "High": "float", "Low": "float", "Close": "float", "Adj": "float", "Volume": "int"})
cryptos = five_cryptos["Currency"].unique()
keep_indices = []
for date in five_cryptos["Date"].unique():
    date_to_compare = five_cryptos[five_cryptos["Date"] == date]
    if len(date_to_compare) == len(cryptos):
        keep_indices.extend(date_to_compare.index)
comparison_data = five_cryptos.iloc[keep_indices]
stats = comparison_data.describe()
stats

Unnamed: 0,Date,Open,High,Low,Close,Adj,Volume,Currency
1038,2023-08-27,0.000020,0.000020,0.000020,0.000020,0.000020,1,BonFi
1037,2023-08-28,0.000020,0.000020,0.000020,0.000020,0.000020,1,BonFi
1036,2023-08-29,0.000020,0.000020,0.000020,0.000020,0.000020,1,BonFi
1035,2023-08-30,0.000020,0.000020,0.000020,0.000020,0.000020,1,BonFi
1034,2023-08-31,0.000020,0.000020,0.000020,0.000020,0.000020,1,BonFi
...,...,...,...,...,...,...,...,...
672,2024-08-27,0.000055,0.000055,0.000054,0.000054,0.000054,10,BonFi
390,2024-08-27,0.001851,0.001851,0.001812,0.001812,0.001812,11,FLOKI X
163,2024-08-27,0.00000000,0.00000000,0.00000000,0.00000000,0.00000000,148,Gorilla In A Coupe
0,2024-08-27,0.000292,0.000293,0.000284,0.000284,0.000284,1910,Solamander


In [52]:
comparison_data.describe()

count    3.200000e+02
mean     1.583183e+04
min      0.000000e+00
25%      2.200000e+01
50%      1.580000e+02
75%      2.165000e+03
max      2.172445e+06
std      1.360853e+05
Name: Volume, dtype: float64

5

5