In [1]:

import pandas as pd
from bs4 import BeautifulSoup
import time
from datetime import datetime

!pip install selenium
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

In [None]:
df = pd.read_csv('../data/raw/coinmarketcap_cryptocurrency_listings_latest.csv')
df.head()

# Get historical data from 2017-2024 for each currency using url

In [None]:
start_date = 20170101
end_date = 202403026

In [None]:
def create_crypt_history_url(coin_name):
    base_url = 'https://coinmarketcap.com'
    return f'{base_url}/currencies/{coin_name}/historical-data/?start={start_date}&end={end_date}'

In [None]:
def check_if_limit_reached(driver, start_date):
    try:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        history_div = soup.find('div', {'class': 'history'})
        history_table = history_div.find('table')

        # Get the last date in the table
        last_date_string = history_table.findAll('tr')[-1].find('td').text
        last_date = datetime.strptime(last_date_string, '%b %d, %Y')
        from_date = datetime.strptime(str(start_date), '%Y%m%d')
        
        if last_date <= from_date:
            return True
        return False
    
    except Exception as e:
        print('Exception check_if_limit_reached: ', e)
        return False

In [None]:
def get_crypt_history(coin):
    url = create_crypt_history_url(coin)
    driver = webdriver.Chrome()
    driver.get(url)
    
    # wait for the history table to be generated
    wait = WebDriverWait(driver, 10)
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.history table')))
    
    while True:
        try:
            # wait for load_more_button to be clickable
            load_more_button = wait.until(EC.presence_of_element_located((By.XPATH, "//button[text()='Load More']")))
            driver.execute_script("arguments[0].scrollIntoView();", load_more_button)
            driver.execute_script("arguments[0].click();", load_more_button)
            time.sleep(3)
            
            if check_if_limit_reached(driver, start_date):
                break
            
        except Exception as e:
            break
        
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    driver.quit()
    
    history_div = soup.find('div', {'class': 'history'})
    history_table = history_div.find('table')
    
    history_df = pd.read_html(str(history_table))[0]
    
    # save df to csv
    file_name = f'../data/raw/{coin}.csv'
    history_df.to_csv(file_name, index=False)
    
    print(f'{coin} history saved to {file_name}. Records: {history_df.shape[0]}')
    

In [None]:
for coin in df['slug'][:5]:
    get_crypt_history(coin)