In [1]:
import time
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from soup2dict import convert
import re
import os
from urllib.parse import urljoin
from timeit import default_timer as timer
import datetime
import requests
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import shutil
from totaltimeout import Timeout

from utils import get_chromedriver, scrape_coinmarketcap, scrape_icomarketcap_link_only

In [2]:
CHROMEDRIVER_PATH = r"C:\Users\Alessandro Bitetto\Downloads\UniPV\ICOs\WebDriver\chromedriver"

In [3]:
# set folders
CHECKPOINT_FOLDER = '.\\Checkpoints'
RESULTS_FOLDER = '.\\Results'
COINMARKETCAP_FOLDER = '.\\Checkpoints\\Coinmarketcap'
PRICE_FOLDER = '.\\Checkpoints\\Coinmarketcap\\Price'
PICKLE_FOLDER = '.\\Checkpoints\\Coinmarketcap\\Pickle'
SCREENSHOT_FOLDER = '.\\Checkpoints\\Coinmarketcap\\Screenshot'

if not os.path.exists(CHECKPOINT_FOLDER):
    os.makedirs(CHECKPOINT_FOLDER)
if not os.path.exists(RESULTS_FOLDER):
    os.makedirs(RESULTS_FOLDER)
if not os.path.exists(COINMARKETCAP_FOLDER):
    os.makedirs(COINMARKETCAP_FOLDER)
if not os.path.exists(PRICE_FOLDER):
    os.makedirs(PRICE_FOLDER)
if not os.path.exists(PICKLE_FOLDER):
    os.makedirs(PICKLE_FOLDER)
if not os.path.exists(SCREENSHOT_FOLDER):
    os.makedirs(SCREENSHOT_FOLDER)

## Get currencies url

In [4]:
MAIN_PAGE = 'https://coinmarketcap.com/'

In [7]:
# get total pages and total currencies
page = requests.get(MAIN_PAGE)
soup = BeautifulSoup(page.content, 'html.parser')

tt=convert(soup.find_all('div', class_="sc-aef7b723-0 sc-18df06a5-0 hBoqvQ", recursive=True))
tot_curr = int(soup.find_all(string = re.compile('Showing '))[0].split('out of ')[1])
tot_pages = int(tt['div'][0]['div'][0]['#text'].split('... ')[-1])
print(f'{tot_curr} currencies found in {tot_pages} pages\n')

# get list
start = timer()
df_list=pd.DataFrame()
download_date=datetime.datetime.now().strftime("%d/%m/%Y")
for page in range(tot_pages):
    
    print(f'- Downloading: {page+1} / {tot_pages}', end='\r')
    
    # page url
    if page == 0:
        page_url=MAIN_PAGE
    else:
        page_url=urljoin(MAIN_PAGE, f'?page={page+1}')
    
    # get info
    page = requests.get(page_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    tt=convert(soup)    

    table_list=tt['html'][0]['body'][0]['div'][0]['div'][0]['div'][0]['div'][1]['div'][0]['div'][0]['div'][3]['table'][0]['tbody'][0]['tr']
    for row in table_list:

        if len(row) == 2:  # top rows where graphs are loaded
            url=row['td'][2]['div'][0]['a'][0]['@href']
            name=row['td'][2]['div'][0]['a'][0]['div'][0]['div'][0]['p'][0]['#text']
            ticker=row['td'][2]['div'][0]['a'][0]['div'][0]['div'][0]['div'][0]['p'][0]['#text']

        if len(row) == 3:  # bottom rows where graphs are not loaded
            ind=np.where(['a' in d.keys() for d in row['td']])[0][0]
            url=row['td'][ind]['a'][0]['@href']
            for d in row['td'][ind]['a'][0]['span']:
                if '@class' in d.keys():
                    if d['@class']==['crypto-symbol']:
                        ticker=d['#text']
                else:
                    name=d['#text']

        add_row=pd.DataFrame({'url': urljoin(MAIN_PAGE, url), 'name': name, 'ticker': ticker}, index=[0])
        df_list=pd.concat([df_list, add_row])

df_list['ListDownloadedOn']=download_date
print('\n\nTotal elapsed time:', str(datetime.timedelta(seconds=round(timer()-start))))

# checks
if len(df_list) != tot_curr:
    print(f'\n######### Warning: {len(df_list)} currencies downloaded (expected {tot_curr})')
if df_list.isna().sum().sum() > 0:
    print('\n######### Warning: missing in df_list')
    
orig_row=len(df_list)
df_list=df_list.drop_duplicates()
if orig_row != len(df_list):
    print(f'\n- {orig_row - len(df_list)} duplicates found. Removed.')
    
print(f'- {len(df_list)} currencies remaining.')

df_list['parent_url_count']=df_list['url'].apply(lambda x: x.replace(MAIN_PAGE, '').strip('/').split('/').__len__())
def split_url(x):
    part=x['url'].replace(MAIN_PAGE, '').strip('/').split('/')
    return pd.Series({'url'+str(i+1): v for i, v in enumerate(part)})
df_list=pd.concat([df_list, df_list.apply(split_url, axis=1)], axis=1)

display(df_list)

parent_url_count=df_list['parent_url_count'].unique()
main_parent_url=df_list['url1'].unique()
print(f'- Max number of parent url: {parent_url_count}')
print(f'- Main parent url is: {main_parent_url}')
unique_url=df_list['url'].nunique()
unique_name=df_list['name'].nunique()
unique_ticker=df_list['ticker'].nunique()
unique_url2=df_list['url2'].nunique()
print(f'\n- Unique "url": {unique_url} / {len(df_list)}')
print(f'- Unique "name": {unique_name} / {len(df_list)}')
print(f'- Unique "ticker": {unique_ticker} / {len(df_list)}')
print(f'- Unique "url2" (used to save single files): {unique_url2} / {len(df_list)}      <<--------')


# save results
save_path=os.path.join(RESULTS_FOLDER,'10a_CoinMarketCap_currency_list.csv')
df_list.to_csv(save_path, index=False, sep=';')
print('\nData saved in ', save_path)

9705 currencies found in 98 pages

- Downloading: 98 / 98

Total elapsed time: 0:01:17

- 36 duplicates found. Removed.
- 9669 currencies remaining.


Unnamed: 0,url,name,ticker,ListDownloadedOn,parent_url_count,url1,url2
0,https://coinmarketcap.com/currencies/bitcoin/,Bitcoin,BTC,19/05/2023,2,currencies,bitcoin
0,https://coinmarketcap.com/currencies/ethereum/,Ethereum,ETH,19/05/2023,2,currencies,ethereum
0,https://coinmarketcap.com/currencies/tether/,Tether,USDT,19/05/2023,2,currencies,tether
0,https://coinmarketcap.com/currencies/bnb/,BNB,BNB,19/05/2023,2,currencies,bnb
0,https://coinmarketcap.com/currencies/usd-coin/,USD Coin,USDC,19/05/2023,2,currencies,usd-coin
...,...,...,...,...,...,...,...
0,https://coinmarketcap.com/currencies/familytoken/,FamilyToken,FT-2,19/05/2023,2,currencies,familytoken
0,https://coinmarketcap.com/currencies/penny/,Penny,PENNY,19/05/2023,2,currencies,penny
0,https://coinmarketcap.com/currencies/octo-fina...,Octo,OCTO,19/05/2023,2,currencies,octo-finance
0,https://coinmarketcap.com/currencies/basedpepe/,BASEDPEPE,BPEPE,19/05/2023,2,currencies,basedpepe


- Max number of parent url: [2]
- Main parent url is: ['currencies']

- Unique "url": 9669 / 9669
- Unique "name": 9596 / 9669
- Unique "ticker": 8361 / 9669
- Unique "url2" (used to save single files): 9669 / 9669      <<--------

Data saved in  .\Results\10a_CoinMarketCap_currency_list.csv


## Download price time series and website/whitepaper info

In [7]:
RELOAD_PKL=True
RECOVER_MISSING=True     # if True reload pickle and tries to recover missing information. Better to use with RELOAD_PKL.

df_list=pd.read_csv(os.path.join(RESULTS_FOLDER,'10a_CoinMarketCap_currency_list.csv'), sep=';')
scrape_df=pd.DataFrame()
for index, row in df_list.iterrows():
    
    file_name=row['url2']
    pickle_path=os.path.join(PICKLE_FOLDER, file_name+'.pkl')
    
    message=f'- Scraping: {str(index + 1)} / {len(df_list)} "{file_name}"  last interaction: {datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S")}'
    print(message, end='\r')
    
    if not RELOAD_PKL or not os.path.exists(pickle_path):
        
        add_row=scrape_coinmarketcap(row=row, skip_download=False, check_cookies=False, chromedriver_path=CHROMEDRIVER_PATH,
                                     price_folder=PRICE_FOLDER, screenshot_folder=SCREENSHOT_FOLDER)
        add_row.to_pickle(pickle_path, protocol=-1)
    
    else:
        add_row=pd.read_pickle(pickle_path)
        
        if RECOVER_MISSING:
            if add_row['PriceSeriesStatus'][0] not in ['DOWNLOADED', 'UNTRACKED', 'PAGE_NOT_AVAILABLE']:
                add_row=scrape_coinmarketcap(row=row, skip_download=False, check_cookies=False, add_row=add_row, chromedriver_path=CHROMEDRIVER_PATH,
                                         price_folder=PRICE_FOLDER, screenshot_folder=SCREENSHOT_FOLDER)
                add_row.to_pickle(pickle_path, protocol=-1)

    scrape_df=pd.concat([scrape_df, add_row])
    
    check_price_status = (scrape_df['PriceSeriesStatus'] == 'DOWNLOADED').sum()
    check_website_status = (scrape_df['WebsiteStatus'] == 'FOUND').sum()
    check_whitepaper_status = (scrape_df['WhitepaperStatus'] == 'FOUND').sum()
    print(message + f'  Price: {check_price_status}  Website: {check_website_status}  Whitepaper: {check_whitepaper_status}                                 ', end='\r')

scrape_df.reset_index(drop=True, inplace=True)
display(scrape_df['PriceSeriesStatus'].value_counts().to_frame())
print(f'Price found in folder: {len(os.listdir(PRICE_FOLDER))}')
display(scrape_df['PageType'].value_counts().to_frame())
display(scrape_df['WebsiteStatus'].value_counts().to_frame())
display(scrape_df['WhitepaperStatus'].value_counts().to_frame())
   
print('\n\nTotal elapsed time:', str(datetime.timedelta(seconds=round(scrape_df['TotTimeSec'].sum()))))

# save results
save_path=os.path.join(RESULTS_FOLDER,'10b_CoinMarketCap_scraping_list.csv')
scrape_df.to_csv(save_path, index=False, sep=';')
print('\nData saved in ', save_path)

- Scraping: 9669 / 9669 "metabit"  last interaction: 03/06/2023 09:59:39  Price: 9393  Website: 9054  Whitepaper: 5929                                                                               

Unnamed: 0,PriceSeriesStatus
DOWNLOADED,9393
UNTRACKED,275
PAGE_NOT_AVAILABLE,1


Price found in folder: 9393


Unnamed: 0,PageType
NORMAL,5474
ALTERNATIVE,3581
ERROR,614


Unnamed: 0,WebsiteStatus
FOUND,9054
ERROR,615


Unnamed: 0,WhitepaperStatus
FOUND,5929
ERROR,3740




Total elapsed time: 8 days, 18:27:22

Data saved in  .\Results\10b_CoinMarketCap_scraping_list.csv


## Recover only links

In [5]:
pickle_path=os.path.join(COINMARKETCAP_FOLDER,'CoinMarketCap_scraping_list_link_only.pkl')

df_list=pd.read_csv(os.path.join(RESULTS_FOLDER,'10a_CoinMarketCap_currency_list.csv'), sep=';')
if not os.path.exists(pickle_path):
    scrape_link_df=pd.DataFrame(columns=['url'])
else:
    scrape_link_df=pd.read_pickle(pickle_path)
for index, row in df_list.iterrows():
    
    file_name=row['url2']
    url=row['url']

    print(f'- Scraping: {str(index + 1)} / {len(df_list)} {file_name}                            ', end='\r')
    
    if url not in scrape_link_df['url'].values:
        add_row=scrape_icomarketcap_link_only(row)
        scrape_link_df=pd.concat([scrape_link_df, add_row])
    
    if index % 200 == 0:
        scrape_link_df.to_pickle(pickle_path, protocol=-1)
  
print('\n\nTotal elapsed time:', str(datetime.timedelta(seconds=round(scrape_link_df['TotTimeSec'].sum()))))

display(pd.DataFrame({'Error': np.where(scrape_link_df['error']!='', 'ERROR', 'OK')})['Error'].value_counts().to_frame())

# save results
scrape_link_df.to_pickle(pickle_path, protocol=-1)
print('\nData saved in ', pickle_path)

- Scraping: 1 / 9669 bitcoin                            - Scraping: 2 / 9669 ethereum                            - Scraping: 3 / 9669 tether                            - Scraping: 4 / 9669 bnb                            - Scraping: 5 / 9669 usd-coin                            - Scraping: 6 / 9669 xrp                            - Scraping: 7 / 9669 cardano                            - Scraping: 8 / 9669 dogecoin                            - Scraping: 9 / 9669 polygon                            - Scraping: 10 / 9669 solana                            - Scraping: 11 / 9669 litecoin                            - Scraping: 12 / 9669 tron                            - Scraping: 13 / 9669 polkadot-new                            - Scraping: 14 / 9669 binance-usd                            - Scraping: 15 / 9669 shiba-inu                            - Scraping: 16 / 9669 avalanche                            - Scraping: 17 / 9669 multi-collateral-dai                            - Scra

- Scraping: 1502 / 9669 brn-metaverse                            - Scraping: 1503 / 9669 acknoledger                            - Scraping: 1504 / 9669 appcoins                            - Scraping: 1505 / 9669 phoenixcoin                            - Scraping: 1506 / 9669 education-ecosystem                            - Scraping: 1507 / 9669 deepspace                            - Scraping: 1508 / 9669 sirin-labs-token                            - Scraping: 1509 / 9669 levolution                            - Scraping: 1510 / 9669 databroker                            - Scraping: 1511 / 9669 ethpad                            - Scraping: 1512 / 9669 moeda-loyalty-points                            - Scraping: 1513 / 9669 movez                            - Scraping: 1514 / 9669 whiteheart                            - Scraping: 1515 / 9669 unistake                            - Scraping: 1516 / 9669 omni                            - Scraping: 1517 / 9669 tenup                

- Scraping: 9669 / 9669 metabit                                                                          

Total elapsed time: 3:16:59


Unnamed: 0,Error
OK,9661
ERROR,8



Data saved in  .\Checkpoints\Coinmarketcap\CoinMarketCap_scraping_list_link_only.pkl


In [43]:
add_row['TotTimeSec'][0])

numpy.float64

In [34]:
scrape_df

Unnamed: 0,url,PriceSeriesStatus,PriceSeriesPath,PriceSeriesError,PageType,WebsiteStatus,Website,WebsiteError,WhitepaperStatus,Whitepaper,WhitepaperError,ScreenPath,TotTimeSec
0,https://coinmarketcap.com/currencies/bitcoin/,DOWNLOADED,.\Checkpoints\Coinmarketcap\bitcoin.csv,,NORMAL,FOUND,bitcoin.org,,FOUND,https://bitcoin.org/bitcoin.pdf,,,27.0
1,https://coinmarketcap.com/currencies/ethereum/,ERROR,,Message: \nStacktrace:\nBacktrace:\n\tGetHandl...,ERROR,ERROR,,Message: no such element: Unable to locate ele...,FOUND,https://github.com/ethereum/wiki/wiki/White-Paper,,.\Checkpoints\Coinmarketcap\Screenshot\ethereu...,49.0
2,https://coinmarketcap.com/currencies/tether/,ERROR,,Message: \nStacktrace:\nBacktrace:\n\tGetHandl...,ALTERNATIVE,FOUND,https://tether.to/,Message: no such element: Unable to locate ele...,FOUND,https://tether.to/wp-content/uploads/2016/06/T...,,.\Checkpoints\Coinmarketcap\Screenshot\tether.png,34.0
3,https://coinmarketcap.com/currencies/bnb/,DOWNLOADED,.\Checkpoints\Coinmarketcap\bnb.csv,,NORMAL,FOUND,bnbchain.org,,ERROR,,Message: no such element: Unable to locate ele...,,26.0
4,https://coinmarketcap.com/currencies/usd-coin/,ERROR,,Message: \nStacktrace:\nBacktrace:\n\tGetHandl...,ALTERNATIVE,FOUND,https://www.centre.io/usdc,Message: no such element: Unable to locate ele...,FOUND,https://f.hubspotusercontent30.net/hubfs/93046...,,.\Checkpoints\Coinmarketcap\Screenshot\usd-coi...,34.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6095,https://coinmarketcap.com/currencies/fame-rewa...,ERROR,,Message: element click intercepted: Element <l...,ERROR,ERROR,,Message: no such element: Unable to locate ele...,FOUND,https://famerewardplus.ai/whitepaper/,,.\Checkpoints\Coinmarketcap\Screenshot\fame-re...,28.0
6096,https://coinmarketcap.com/currencies/massive-p...,ERROR,,Message: element click intercepted: Element <l...,ERROR,ERROR,,Message: no such element: Unable to locate ele...,ERROR,,Message: no such element: Unable to locate ele...,.\Checkpoints\Coinmarketcap\Screenshot\massive...,30.0
6097,https://coinmarketcap.com/currencies/sollama-u...,ERROR,,Message: element click intercepted: Element <l...,ERROR,ERROR,,Message: no such element: Unable to locate ele...,ERROR,,Message: no such element: Unable to locate ele...,.\Checkpoints\Coinmarketcap\Screenshot\sollama...,31.0
6098,https://coinmarketcap.com/currencies/darussafa...,ERROR,,Message: element click intercepted: Element <l...,ERROR,ERROR,,Message: no such element: Unable to locate ele...,FOUND,https://bitexenclub.com/upload/whitepaper/daru...,,.\Checkpoints\Coinmarketcap\Screenshot\darussa...,32.0


In [5]:
row

url                 https://coinmarketcap.com/currencies/xbullion/
name                                                      Xbullion
ticker                                                        GOLD
ListDownloadedOn                                        19/05/2023
parent_url_count                                                 2
url1                                                    currencies
url2                                                      xbullion
Name: 6200, dtype: object

In [6]:
df_list=pd.read_csv(os.path.join(RESULTS_FOLDER,'10a_CoinMarketCap_currency_list.csv'), sep=';')
index=6200
row=df_list.iloc[index]
row

url                 https://coinmarketcap.com/currencies/xbullion/
name                                                      Xbullion
ticker                                                        GOLD
ListDownloadedOn                                        19/05/2023
parent_url_count                                                 2
url1                                                    currencies
url2                                                      xbullion
Name: 6200, dtype: object

In [7]:
scrape_coinmarketcap(row=row, skip_download=False, chromedriver_path=CHROMEDRIVER_PATH,
                                     coinmarketcap_folder=COINMARKETCAP_FOLDER, screenshot_folder=SCREENSHOT_FOLDER)

Unnamed: 0,url,PriceSeriesStatus,PriceSeriesPath,PriceSeriesError,PageType,WebsiteStatus,Website,WebsiteError,WhitepaperStatus,Whitepaper,WhitepaperError,ScreenPath,TotTimeSec
0,https://coinmarketcap.com/currencies/xbullion/,ERROR,,Message: element click intercepted: Element <l...,ALTERNATIVE,FOUND,https://xbullion.io/,Message: no such element: Unable to locate ele...,FOUND,https://xbullion.io/wp-content/uploads/2020/10...,,.\Checkpoints\Coinmarketcap\Screenshot\xbullio...,22.0


In [32]:
url='https://coinmarketcap.com/currencies/xbullion/'
driver=get_chromedriver(chromedriver_path=CHROMEDRIVER_PATH, download_folder='')
driver.get(url)
print('get')
driver.maximize_window()
print('maximize')
wait=WebDriverWait(driver, 15)
try:              # close cookies
    cookie=wait.until(
        EC.presence_of_element_located((By.XPATH, '/html/body/div[3]/div[2]/div/div[1]/div/div[2]/div/button[2]')))
    cookie.click()
except:
    try:              # close cookies
        cookie=wait.until(
            EC.presence_of_element_located((By.XPATH, '//*[@id="onetrust-reject-all-handler"]')))
        cookie.click()
    except:
        pass

try:
    # interact with chart and press download button
    driver.execute_script("window.scrollTo(0, 0);")
    print('pre-all')
    allButton=wait.until(
        EC.presence_of_element_located((By.XPATH, '(//ul[@class="react-tabs__tab-list"])[1]//li[text()="ALL"]')))
    allButton.click()
    print('all')
    exportButton=allButton.find_element(By.XPATH, "(//li//div[contains(@class,'custom-button-inner')])[2]")
    actions=ActionChains(driver)
    actions.move_to_element(exportButton).pause(2).click().perform()
    print('menu')
    downloadAsCsvButton=wait.until(
        EC.visibility_of_element_located((By.XPATH, '//button[text()="Download price history (.csv)"]')))
    downloadAsCsvButton.click()
    print('download')
except Exception as e:
    print(str(e))

get
maximize
pre-all
all
menu
download


In [30]:
exportButton=allButton.find_element(By.XPATH, "(//li//div[contains(@class,'custom-button-inner')])[2]")
actions=ActionChains(driver)
actions.move_to_element(exportButton).pause(2).click().perform()

In [28]:
downloadAsCsvButton=wait.until(
    EC.visibility_of_element_located((By.XPATH, '//button[text()="Download price history (.csv)"]')))
downloadAsCsvButton.click()

In [22]:
allButton=wait.until(
        EC.presence_of_element_located((By.XPATH, '(//ul[@class="react-tabs__tab-list"])[1]//li[text()="ALL"]')))
allButton.click()

In [17]:
scrape_df

Unnamed: 0,url,PriceSeriesStatus,PriceSeriesPath,PriceSeriesError,PageType,WebsiteStatus,Website,WebsiteError,WhitepaperStatus,Whitepaper,WhitepaperError,ScreenPath,TotTimeSec
0,https://coinmarketcap.com/currencies/bitcoin/,DOWNLOADED,.\Checkpoints\Coinmarketcap\bitcoin.csv,,ALTERNATIVE,FOUND,https://bitcoin.org/,Message: no such element: Unable to locate ele...,FOUND,https://bitcoin.org/bitcoin.pdf,,.\Checkpoints\Coinmarketcap\Screenshot\bitcoin...,20.0
1,https://coinmarketcap.com/currencies/ethereum/,DOWNLOADED,.\Checkpoints\Coinmarketcap\ethereum.csv,,NORMAL,FOUND,www.ethereum.org,,FOUND,https://github.com/ethereum/wiki/wiki/White-Paper,,,24.0
2,https://coinmarketcap.com/currencies/tether/,DOWNLOADED,.\Checkpoints\Coinmarketcap\tether.csv,,ALTERNATIVE,FOUND,https://tether.to/,Message: no such element: Unable to locate ele...,FOUND,https://tether.to/wp-content/uploads/2016/06/T...,,.\Checkpoints\Coinmarketcap\Screenshot\tether.png,30.0


In [4]:
url = 'https://coinmarketcap.com/currencies/bitcoin/'

df_list=pd.read_csv(os.path.join(RESULTS_FOLDER,'10a_CoinMarketCap_currency_list.csv'), sep=';')
row=df_list[df_list['url']==url].iloc[0]

add_row=scrape_coinmarketcap(row=row, skip_download=False, chromedriver_path=CHROMEDRIVER_PATH,
                                     coinmarketcap_folder=COINMARKETCAP_FOLDER, screenshot_folder=SCREENSHOT_FOLDER)
add_row

Unnamed: 0,url,PriceSeriesStatus,PriceSeriesPath,PriceSeriesError,PageType,WebsiteStatus,Website,WebsiteError,WhitepaperStatus,Whitepaper,WhitepaperError,ScreenPath,TotTimeSec
0,https://coinmarketcap.com/currencies/bitcoin/,ERROR,,name 'Timeout' is not defined,NORMAL,FOUND,bitcoin.org,,FOUND,https://bitcoin.org/bitcoin.pdf,,,20.0


In [14]:
url = 'https://coinmarketcap.com/currencies/bitcoin/'

df_list=pd.read_csv(os.path.join(RESULTS_FOLDER,'10a_CoinMarketCap_currency_list.csv'), sep=';')
row=df_list[df_list['url']==url].iloc[0]

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

from utils import get_chromedriver
import time
import shutil
from totaltimeout import Timeout


skip_download=False
chromedriver_path=CHROMEDRIVER_PATH
coinmarketcap_folder=COINMARKETCAP_FOLDER
screenshot_folder=SCREENSHOT_FOLDER

url=row['url']
file_name=row['url2']
ticker=row['ticker']

# create temp folder for csv download
temp_download_folder=os.path.join(os.getcwd(), 'temp_folder_' + file_name)
if os.path.exists(temp_download_folder):
    shutil.rmtree(temp_download_folder)
os.makedirs(temp_download_folder)

expected_downloaded_file=os.path.join(temp_download_folder, ticker+'_ALL_graph_coinmarketcap.csv')
save_csv_path=os.path.join(coinmarketcap_folder, file_name + '.csv')

# open page
start=timer()
driver=get_chromedriver(chromedriver_path=chromedriver_path, download_folder=temp_download_folder)
driver.get(url)
driver.maximize_window()
wait=WebDriverWait(driver, 10)

# download price series
status_download='SKIPPED'
if not skip_download:
    try:
        # interact with chart and press download button
        allButton=wait.until(
            EC.presence_of_element_located((By.XPATH, '(//ul[@class="react-tabs__tab-list"])[1]//li[text()="ALL"]')))
        allButton.click()
        exportButton=allButton.find_element(By.XPATH, "(//li//div[contains(@class,'custom-button-inner')])[2]")
        actions=ActionChains(driver)
        actions.move_to_element(exportButton).pause(2).click().perform()
        downloadAsCsvButton=wait.until(
            EC.visibility_of_element_located((By.XPATH, '//button[text()="Download price history (.csv)"]')))
        downloadAsCsvButton.click()

        # check download and move to folder
        for time_left in Timeout(20):
            time.sleep(0.3)
            folder_content=os.listdir(temp_download_folder)
            if len(folder_content) > 0:
                status_download='FILE_DOWNLOADED_BUT_ERROR_IN_STORING'
            if os.path.exists(expected_downloaded_file):
                shutil.copy(expected_downloaded_file, save_csv_path)
                status_download='DOWNLOADED'
                break    
    except:
        status_download='ERROR'

if status_download != 'DOWNLOADED':
    save_csv_path=None

# get website url
website=''
page_type='ERROR'
try:
    websiteButton=driver.find_element(By.XPATH, '//div[text()="Website"]')
    actions=ActionChains(driver)
    actions.move_to_element(websiteButton).perform()
    urlElement=wait.until(
        EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[class="tippy-content"] span')))
    website=urlElement.text
    status_website='FOUND'
    page_type='NORMAL'
except:
    status_website='ERROR'
# it seems there are two version of the same page, in the alternative one the Website button behaves differently,
# it shows the url as button text. Try to use xpath instead
if page_type == 'ERROR':
    try:
        websiteButton=driver.find_element(By.XPATH, '//*[@id="__next"]/div/div[1]/div[2]/div/div[1]/div[2]/div/div[1]/div[3]/div/div[1]/ul/li[1]/a')
        website=websiteButton.get_attribute('href')
        status_website='FOUND'
        page_type='ALTERNATIVE'
    except:
        status_website='ERROR'

# get whitepaper url
whitepaper=''
try:
    whitepaperButton = driver.find_element(By.XPATH, '//a[text()="Whitepaper"]')
    whitepaper=whitepaperButton.get_attribute('href')
    status_whitepaper='FOUND'
except:
    status_whitepaper='ERROR'

# take page screenshot for debug
screen_path=None
if page_type == 'ALTERNATIVE' or status_website == 'ERROR':
    screen_path=os.path.join(screenshot_folder, file_name + '.png')
    driver.save_screenshot(screen_path)

# close page and remove temp folder
# driver.close()
# try:
#     shutil.rmtree(temp_download_folder)
# except:
#     pass

add_row=pd.DataFrame({'url': url, 'PriceSeriesStatus': status_download, 'PriceSeriesPath': save_csv_path,
                     'PageType': page_type, 'WebsiteStatus': status_website, 'Website': website,
                     'WhitepaperStatus': status_whitepaper, 'Whitepaper': whitepaper, 'ScreenPath': screen_path,
                     'TotTimeSec': datetime.timedelta(seconds=round(timer()-start)).total_seconds()}, index=[0])

add_row

Unnamed: 0,url,PriceSeriesStatus,PriceSeriesPath,PageType,WebsiteStatus,Website,WhitepaperStatus,Whitepaper,ScreenPath,TotTimeSec
0,https://coinmarketcap.com/currencies/bitcoin/,DOWNLOADED,.\Checkpoints\Coinmarketcap\bitcoin.csv,NORMAL,FOUND,bitcoin.org,FOUND,https://bitcoin.org/bitcoin.pdf,,23.0


In [121]:
df_list=pd.read_csv(os.path.join(RESULTS_FOLDER,'10a_CoinMarketCap_currency_list.csv'), sep=';')

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

from utils import get_chromedriver
import time
import shutil
from totaltimeout import Timeout


skip_download=False

def scrape_coinmarketcap(row=None, skip_download=False, screenshot_folder=''):
    
    '''
    - row: row from df_list.iterrows()
    - skip_download: if True skips price series download
    '''
    
    url=row['url']
    file_name=row['url2']
    ticker=row['ticker']

    # create temp folder for csv download
    temp_download_folder=os.path.join(os.getcwd(), 'temp_folder_' + file_name)
    if os.path.exists(temp_download_folder):
        shutil.rmtree(temp_download_folder)
    os.makedirs(temp_download_folder)

    expected_downloaded_file=os.path.join(temp_download_folder, ticker+'_ALL_graph_coinmarketcap.csv')
    save_csv_path=os.path.join(COINMARKETCAP_FOLDER, file_name + '.csv')

    # open page
    start=timer()
    driver=get_chromedriver(chromedriver_path=CHROMEDRIVER_PATH, download_folder=temp_download_folder)
    driver.get(url)
    driver.maximize_window()
    wait=WebDriverWait(driver, 10)

    # download price series
    status_download='SKIPPED'
    if not skip_download:
        try:
            # interact with chart and press download button
            allButton=wait.until(
                EC.presence_of_element_located((By.XPATH, '(//ul[@class="react-tabs__tab-list"])[1]//li[text()="ALL"]')))
            allButton.click()
            exportButton=allButton.find_element(By.XPATH, "(//li//div[contains(@class,'custom-button-inner')])[2]")
            actions=ActionChains(driver)
            actions.move_to_element(exportButton).pause(2).click().perform()
            downloadAsCsvButton=wait.until(
                EC.visibility_of_element_located((By.XPATH, '//button[text()="Download price history (.csv)"]')))
            downloadAsCsvButton.click()

            # check download and move to folder
            for time_left in Timeout(20):
                time.sleep(0.3)
                folder_content=os.listdir(temp_download_folder)
                if len(folder_content) > 0:
                    status_download='FILE_DOWNLOADED_BUT_ERROR_IN_STORING'
                if os.path.exists(expected_downloaded_file):
                    shutil.copy(expected_downloaded_file, save_csv_path)
                    status_download='DOWNLOADED'
                    break    
        except:
            status_download='ERROR'

    if status_download != 'DOWNLOADED':
        save_csv_path=None

    # get website url
    website=''
    page_type='ERROR'
    try:
        websiteButton=driver.find_element(By.XPATH, '//div[text()="Website"]')
        actions=ActionChains(driver)
        actions.move_to_element(websiteButton).perform()
        urlElement=wait.until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[class="tippy-content"] span')))
        website=urlElement.text
        status_website='FOUND'
        page_type='NORMAL'
    except:
        status_website='ERROR'
    # it seems there are two version of the same page, in the alternative one the Website button behaves differently,
    # it shows the url as button text. Try to use xpath instead
    if page_type == 'ERROR':
        try:
            websiteButton=driver.find_element(By.XPATH, '//*[@id="__next"]/div/div[1]/div[2]/div/div[1]/div[2]/div/div[1]/div[3]/div/div[1]/ul/li[1]/a')
            website=websiteButton.get_attribute('href')
            status_website='FOUND'
            page_type='ALTERNATIVE'
        except:
            status_website='ERROR'

    # get whitepaper url
    whitepaper=''
    try:
        whitepaperButton = driver.find_element(By.XPATH, '//a[text()="Whitepaper"]')
        whitepaper=whitepaperButton.get_attribute('href')
        status_whitepaper='FOUND'
    except:
        status_whitepaper='ERROR'
        
    # take page screenshot for debug
    screen_path=None
    if page_type == 'ALTERNATIVE' or status_website == 'ERROR':
        screen_path=os.path.join(screenshot_folder, file_name + '.png')
        driver.save_screenshot(screen_path)

    # close page and remove temp folder
    driver.close()
    try:
        shutil.rmtree(temp_download_folder)
    except:
        pass

    add_row=pd.DataFrame({'url': url, 'PriceSeriesStatus': status_download, 'PriceSeriesPath': save_csv_path,
                         'PageType': page_type, 'WebsiteStatus': status_website, 'Website': website,
                         'WhitepaperStatus': status_whitepaper, 'Whitepaper': whitepaper, 'ScreenPath': screen_path,
                         'TotTimeSec': datetime.timedelta(seconds=round(timer()-start)).total_seconds()}, index=[0])
    
    return add_row

# url='https://coinmarketcap.com/currencies/bob/'
url = 'https://coinmarketcap.com/currencies/vitex-coin/'
row=df_list[df_list['url']==url].iloc[0]

add_row=scrape_coinmarketcap(row=row, skip_download=False, screenshot_folder=SCREENSHOT_FOLDER)
add_row




Unnamed: 0,url,PriceSeriesStatus,PriceSeriesPath,PageType,WebsiteStatus,Website,WhitepaperStatus,Whitepaper,ScreenPath,TotTimeSec
0,https://coinmarketcap.com/currencies/vitex-coin/,DOWNLOADED,.\Checkpoints\Coinmarketcap\vitex-coin.csv,NORMAL,FOUND,vitex.net,ERROR,,,21.0


In [10]:
driver.find_element(By.XPATH, '/html/body/div[3]/div[2]/div/div[1]/div/div[2]/div/button[2]').click()

In [8]:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as ExpectedConditions
# from webdriver_manager.chrome import ChromeDriverManager
# from selenium.webdriver.chrome.service import Service
# from webdriver_manager.microsoft import EdgeChromiumDriverManager

from utils import get_chromedriver

url = 'https://coinmarketcap.com/currencies/tor-wallet/'
driver = get_chromedriver(chromedriver_path=CHROMEDRIVER_PATH)
# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
# driver = webdriver.Edge(executable_path=EdgeChromiumDriverManager().install())

wait = WebDriverWait(driver, 10)
driver.get(url)
# websiteButton = driver.find_element(By.XPATH, '//div[text()="Website"]')
# actions = ActionChains(driver)
# actions.move_to_element(websiteButton).perform()
# urlElement = wait.until(
#     EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[class="tippy-content"] span')))
# print(urlElement.text)
driver.maximize_window()

In [13]:
scroll=1000
driver.execute_script(f"window.scrollTo(0, {scroll});")

In [141]:
websiteButton = driver.find_element(By.XPATH, '//div[text()="Website"]')
actions = ActionChains(driver)
actions.move_to_element(websiteButton).perform()
urlElement = wait.until(
    EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[class="tippy-content"] span')))
print(urlElement.text)

TimeoutException: Message: 
Stacktrace:
Backtrace:
	GetHandleVerifier [0x004E8893+48451]
	(No symbol) [0x0047B8A1]
	(No symbol) [0x00385058]
	(No symbol) [0x003B0467]
	(No symbol) [0x003B069B]
	(No symbol) [0x003DDD92]
	(No symbol) [0x003CA304]
	(No symbol) [0x003DC482]
	(No symbol) [0x003CA0B6]
	(No symbol) [0x003A7E08]
	(No symbol) [0x003A8F2D]
	GetHandleVerifier [0x00748E3A+2540266]
	GetHandleVerifier [0x00788959+2801161]
	GetHandleVerifier [0x0078295C+2776588]
	GetHandleVerifier [0x00572280+612144]
	(No symbol) [0x00484F6C]
	(No symbol) [0x004811D8]
	(No symbol) [0x004812BB]
	(No symbol) [0x00474857]
	BaseThreadInitThunk [0x760B00C9+25]
	RtlGetAppContainerNamedObjectPath [0x77687B4E+286]
	RtlGetAppContainerNamedObjectPath [0x77687B1E+238]


In [None]:
                                 //*[@id="__next"]/div/div[1]/div[2]/div/div[1]/div[2]/div/div[1]/div[3]/div/div[1]/ul/li[1]/a

In [143]:
websiteButton=driver.find_element(By.XPATH, '//*[@id="__next"]/div/div[1]/div[2]/div/div[1]/div[2]/div/div[1]/div[3]/div/div[1]/ul/li[1]/button')
websiteButton.get_attribute('href')


In [147]:
websiteButton = driver.find_element(By.XPATH, '//*[@id="__next"]/div/div[1]/div[2]/div/div[1]/div[2]/div/div[1]/div[3]/div/div[1]/ul/li[1]/button')
actions = ActionChains(driver)
actions.move_to_element(websiteButton).perform()
urlElement = wait.until(
    EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[class="tippy-content"] span')))
print(urlElement.text)

TimeoutException: Message: 
Stacktrace:
Backtrace:
	GetHandleVerifier [0x004E8893+48451]
	(No symbol) [0x0047B8A1]
	(No symbol) [0x00385058]
	(No symbol) [0x003B0467]
	(No symbol) [0x003B069B]
	(No symbol) [0x003DDD92]
	(No symbol) [0x003CA304]
	(No symbol) [0x003DC482]
	(No symbol) [0x003CA0B6]
	(No symbol) [0x003A7E08]
	(No symbol) [0x003A8F2D]
	GetHandleVerifier [0x00748E3A+2540266]
	GetHandleVerifier [0x00788959+2801161]
	GetHandleVerifier [0x0078295C+2776588]
	GetHandleVerifier [0x00572280+612144]
	(No symbol) [0x00484F6C]
	(No symbol) [0x004811D8]
	(No symbol) [0x004812BB]
	(No symbol) [0x00474857]
	BaseThreadInitThunk [0x760B00C9+25]
	RtlGetAppContainerNamedObjectPath [0x77687B4E+286]
	RtlGetAppContainerNamedObjectPath [0x77687B1E+238]


In [142]:
websiteButton=driver.find_element(By.XPATH, '//*[@id="__next"]/div/div[1]/div[2]/div/div[1]/div[2]/div/div[1]/div[3]/div/div[1]/ul/li[1]/a')
websiteButton.get_attribute('href')

NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//*[@id="__next"]/div/div[1]/div[2]/div/div[1]/div[2]/div/div[1]/div[3]/div/div[1]/ul/li[1]/a"}
  (Session info: chrome=113.0.5672.126)
Stacktrace:
Backtrace:
	GetHandleVerifier [0x004E8893+48451]
	(No symbol) [0x0047B8A1]
	(No symbol) [0x00385058]
	(No symbol) [0x003B0467]
	(No symbol) [0x003B069B]
	(No symbol) [0x003DDD92]
	(No symbol) [0x003CA304]
	(No symbol) [0x003DC482]
	(No symbol) [0x003CA0B6]
	(No symbol) [0x003A7E08]
	(No symbol) [0x003A8F2D]
	GetHandleVerifier [0x00748E3A+2540266]
	GetHandleVerifier [0x00788959+2801161]
	GetHandleVerifier [0x0078295C+2776588]
	GetHandleVerifier [0x00572280+612144]
	(No symbol) [0x00484F6C]
	(No symbol) [0x004811D8]
	(No symbol) [0x004812BB]
	(No symbol) [0x00474857]
	BaseThreadInitThunk [0x760B00C9+25]
	RtlGetAppContainerNamedObjectPath [0x77687B4E+286]
	RtlGetAppContainerNamedObjectPath [0x77687B1E+238]


In [106]:
aa=driver.find_element(By.XPATH, '//*[@id="__next"]/div/div[1]/div[2]/div/div[1]/div[2]/div/div[1]/div[3]/div/div[1]/ul/li[1]/a')

In [102]:
aa=driver.find_element(By.XPATH, '//*[@id="__next"]/div/div[1]/div[2]/div/div[1]/div[2]/div/div[1]/div[3]/div/div[1]/ul/li[1]/a')

In [104]:
aa.text

'www.aeternity.com'

In [107]:
aa.get_attribute('href')

'https://www.aeternity.com/'

In [98]:
whitepaperButton = driver.find_element(By.XPATH, '//a[text()="Whitepaper"]')
whitepaperButton.get_attribute('href')

'https://github.com/aeternity/protocol'

In [97]:
websiteButton = driver.find_element(By.XPATH, '//div[text()="Website"]')
actions = ActionChains(driver)
actions.move_to_element(websiteButton).perform()
urlElement = wait.until(
    EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[class="tippy-content"] span')))
print(urlElement.text)

www.zkbob.com


In [46]:
actions = ActionChains(driver)
actions.move_to_element(websiteButton).perform()
urlElement = wait.until(
    EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[class="tippy-content"] span')))
print(urlElement.text)

www.aeknow.org


In [25]:
websiteButton = driver.find_element(By.XPATH, '//div[text()="Whitepaper"]')

In [26]:
actions = ActionChains(driver)
actions.move_to_element(websiteButton).perform()
urlElement = wait.until(
    EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[class="tippy-content"] span')))
print(urlElement.text)

TimeoutException: Message: 
Stacktrace:
Backtrace:
	GetHandleVerifier [0x00007FF765894AD2+67490]
	Microsoft::Applications::Events::EventProperty::~EventProperty [0x00007FF76582B242+782402]
	(No symbol) [0x00007FF7655CC646]
	(No symbol) [0x00007FF76560F972]
	(No symbol) [0x00007FF76560FB8A]
	(No symbol) [0x00007FF765649817]
	(No symbol) [0x00007FF76562DF8F]
	(No symbol) [0x00007FF765603751]
	(No symbol) [0x00007FF765646BC5]
	(No symbol) [0x00007FF76562DD23]
	(No symbol) [0x00007FF765602794]
	(No symbol) [0x00007FF7656019B0]
	(No symbol) [0x00007FF765602F04]
	Microsoft::Applications::Events::ILogManager::DispatchEventBroadcast [0x00007FF765A590D3+1301571]
	(No symbol) [0x00007FF76568B951]
	Microsoft::Applications::Events::EventProperty::~EventProperty [0x00007FF7657746C1+33985]
	Microsoft::Applications::Events::EventProperty::~EventProperty [0x00007FF76576CAF5+2293]
	Microsoft::Applications::Events::ILogManager::DispatchEventBroadcast [0x00007FF765A57E33+1296803]
	Microsoft::Applications::Events::EventProperty::~EventProperty [0x00007FF765832589+811913]
	Microsoft::Applications::Events::EventProperty::~EventProperty [0x00007FF76582EF04+797956]
	Microsoft::Applications::Events::EventProperty::~EventProperty [0x00007FF76582EFFC+798204]
	Microsoft::Applications::Events::EventProperty::~EventProperty [0x00007FF7658250B1+757425]
	BaseThreadInitThunk [0x00007FF9613A7614+20]
	RtlUserThreadStart [0x00007FF9619026A1+33]


In [None]:
<div class="sc-aef7b723-0 sc-18df06a5-0 hBoqvQ"><p color="text" font-size="1" data-sensors-click="true" class="sc-4984dd93-0 jZrMxO">Showing 9601 - 9698 out of 9698</p><div class="sc-18df06a5-3 hiNKNZ"><div class="sc-fd786ab3-0 bXSmUJ"><ul class="pagination"><li class="previous"><a class="chevron" tabindex="0" role="button" aria-disabled="false" aria-label="Previous page" href="/?page=96"><svg xmlns="http://www.w3.org/2000/svg" fill="none" height="14" width="14" viewBox="0 0 24 24" class="sc-aef7b723-0 dgXMPo"><path d="M15 6L9 12L15 18" stroke="currentColor" stroke-width="2" stroke-miterlimit="10" stroke-linecap="round" stroke-linejoin="round"></path></svg></a></li><li class="page"><a role="button" tabindex="0" aria-label="Page 1" href="/">1</a></li><li class="break"><a role="button" tabindex="0">...</a></li><li class="page"><a role="button" href="/?page=93" tabindex="0" aria-label="Page 93">93</a></li><li class="page"><a role="button" href="/?page=94" tabindex="0" aria-label="Page 94">94</a></li><li class="page"><a role="button" href="/?page=95" tabindex="0" aria-label="Page 95">95</a></li><li class="page"><a role="button" href="/?page=96" tabindex="0" aria-label="Page 96">96</a></li><li class="page active"><a role="button" tabindex="0" aria-label="Page 97 is your current page" aria-current="page">97</a></li><li class="next disabled"><a class="chevron" tabindex="0" role="button" aria-disabled="true" aria-label="Next page"><svg xmlns="http://www.w3.org/2000/svg" fill="none" height="14" width="14" viewBox="0 0 24 24" class="sc-aef7b723-0 dgXMPo"><path d="M9 6L15 12L9 18" stroke="currentColor" stroke-width="2" stroke-miterlimit="10" stroke-linecap="round" stroke-linejoin="round"></path></svg></a></li></ul></div></div><div class="sc-aef7b723-0 sc-18df06a5-1 cNWAac"><p color="text2" font-size="1" data-sensors-click="true" class="sc-4984dd93-0 bQfHIQ">Show rows</p><div data-sensors-click="true" class="sc-aef7b723-0 sc-381655b8-0 hkCsVx">100<svg xmlns="http://www.w3.org/2000/svg" fill="none" height="14" width="14" viewBox="0 0 24 24" class="sc-aef7b723-0 dgXMPo"><path d="M6 9L12 15L18 9" stroke="currentColor" stroke-width="2" stroke-miterlimit="10" stroke-linecap="round" stroke-linejoin="round"></path></svg></div></div></div>

In [233]:
url='https://coinmarketcap.com/currencies/vitex-coin/'

driver=get_chromedriver(chromedriver_path=CHROMEDRIVER_PATH, download_folder='.\\aaa')
driver.get(url)

In [240]:
aa=driver.find_element('xpath',
                    
                    '//*[@id="react-tabs-52"]/div/div[2]')
aa

<selenium.webdriver.remote.webelement.WebElement (session="cae27bf49865b47225fd9394f65d6520", element="8CB57456B53EFDD0BEA1DCB6F4904B82_element_276")>

In [241]:
from selenium.webdriver.support.ui import Select
drop=Select(aa)

UnexpectedTagNameException: Message: Select only works on <select> elements, not on div


In [7]:
driver.find_element('xpath', '/html/body/div[1]/div/div[1]/div[2]/div/div[3]/div/div[1]/div[2]/div[1]/div/div/div/div[2]/div/div/div[2]/div/div[2]/div/ul/li[5]').click()

In [None]:
<li font-weight="var(--c-font-weight-500)" class="react-tabs__tab react-tabs__tab--selected" role="tab" id="react-tabs-8" aria-selected="true" aria-disabled="false" aria-controls="react-tabs-9" tabindex="0">ALL</li>

In [None]:
/html/body/div[1]/div/div[1]/div[2]/div/div/div[1]/div[2]/div/div/div[2]/div/div/div[2]/div/div[2]/div/ul/li[5]

In [None]:
<li font-weight="var(--c-font-weight-500)" class="react-tabs__tab react-tabs__tab--selected" role="tab" id="react-tabs-46" aria-selected="true" aria-disabled="false" aria-controls="react-tabs-47" tabindex="0">ALL</li>

In [None]:
valueToClick = "All"
button = driver.find_element('xpath',
                             '//div[@class="companyGraph"]//div[@class="highcharts-container "]//*[name()="g" and '
                             f'@class="highcharts-range-selector-group"]//*[name()="text" and text()="{valueToClick}"]')
button.click()

In [85]:
page = requests.get('https://coinmarketcap.com/?page=5')
soup = BeautifulSoup(page.content, 'html.parser')

tt=convert(soup)

# extract list of categories
# tag = soup.find_all('div', class_="icoTop__selects", recursive=True)
# conv_dict = convert(tag)

In [159]:
df_list=pd.DataFrame()
table_list=tt['html'][0]['body'][0]['div'][0]['div'][0]['div'][0]['div'][1]['div'][0]['div'][0]['div'][3]['table'][0]['tbody'][0]['tr']
for i, row in enumerate(table_list):
    
    if len(row) == 2:  # top rows where graphs are loaded
        url=row['td'][2]['div'][0]['a'][0]['@href']
        name=row['td'][2]['div'][0]['a'][0]['div'][0]['div'][0]['p'][0]['#text']
        ticker=row['td'][2]['div'][0]['a'][0]['div'][0]['div'][0]['div'][0]['p'][0]['#text']

    if len(row) == 3:  # bottom rows where graphs are not loaded
        ind=np.where(['a' in d.keys() for d in row['td']])[0][0]
        url=row['td'][ind]['a'][0]['@href']
        for d in row['td'][ind]['a'][0]['span']:
            if '@class' in d.keys():
                if d['@class']==['crypto-symbol']:
                    ticker=d['#text']
            else:
                name=d['#text']
    
    add_row=pd.DataFrame({'url': url, 'name': name, 'ticker': ticker}, index=[i])
    df_list=pd.concat([df_list, add_row])
df_list

Unnamed: 0,url,name,ticker
0,/currencies/forta/,Forta,FORT
1,/currencies/smartofgiving/,smARTOFGIVING,AOG
2,/currencies/onyxcoin/,Onyxcoin,XCN
3,/currencies/stormx/,StormX,STMX
4,/currencies/metadium/,Metadium,META
...,...,...,...
95,/currencies/phoenix-global-new/,Phoenix,PHB
96,/currencies/hourglass/,Hourglass,WAIT
97,/currencies/groestlcoin/,Groestlcoin,GRS
98,/currencies/taboo-token/,TABOO TOKEN,TABOO


In [168]:
int(soup.find_all(string = re.compile('Showing '))[0].split('out of ')[1])

9697

In [182]:
tt['div'][0]['div'][0]['#text'].split('... ')[-1]

'97'

In [177]:
tt=convert(soup.find_all('div', class_="sc-aef7b723-0 sc-18df06a5-0 hBoqvQ", recursive=True))
tt['div'][0]['p']

{'@class': ['sc-aef7b723-0', 'sc-18df06a5-0', 'hBoqvQ'],
 '#text': 'Showing 401 - 500 out of 9697 1 ... 3 4 5 6 7 ... 97 Show rows 100',
 'p': [{'@color': 'text',
   '@font-size': '1',
   '@data-sensors-click': 'true',
   '@class': ['sc-4984dd93-0', 'jZrMxO'],
   '#text': 'Showing 401 - 500 out of 9697',
   'navigablestring': ['Showing 401 - 500 out of 9697']}],
 'div': [{'@class': ['sc-18df06a5-3', 'hiNKNZ'],
   '#text': '1 ... 3 4 5 6 7 ... 97',
   'div': [{'@class': ['sc-fd786ab3-0', 'bXSmUJ'],
     '#text': '1 ... 3 4 5 6 7 ... 97',
     'ul': [{'@class': ['pagination'],
       '#text': '1 ... 3 4 5 6 7 ... 97',
       'li': [{'@class': ['previous'],
         '#text': '',
         'a': [{'@class': ['chevron'],
           '@href': '/?page=4',
           '@tabindex': '0',
           '@role': 'button',
           '@aria-disabled': 'false',
           '@aria-label': 'Previous page',
           '#text': '',
           'svg': [{'@xmlns': 'http://www.w3.org/2000/svg',
             '@fill'