### This program scrapes CoinMarketCap.com's page to fetch the top 300 cryptocurrencies and their data


#### Import all the required libraries:
1. Import selenium webdriver for scraping dynamic webpages - Javascript content
2. Import BeautifulSoup for scraping the static pages - HTML or XML content
3. Import pandas to create a dataframe, convert list objects into this dataframe, and then write it to an excel (.xlsx) file

In [36]:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd

In [40]:
# Define empty lists to store all the scraped columns data

Name=[]
Symbol=[]
Rank=[]
Price=[]
Mcap=[]
Website=[]

#Create a for loop to loop through multiple pages - 3 pages in this case

for page in range(1,4,1):
    
    url="https://coinmarketcap.com/?page=" + str(page) #url of the page to be scraped
    
#Selenium will access the Chrome browser driver in incognito mode (without opening a browser window (headless option))

    options=webdriver.ChromeOptions()
    options.add_argument('--ignore-certificate-errors')
    options.add_argument('--incognito')
    options.add_argument('--headless')
    
#Initiate the webdriver

    driver = webdriver.Chrome(options=options)
    driver.maximize_window()
    driver.get(url)
    
#Create a for loop to scroll to the bottom of each page to load all the hidden javascript elements   

    scroll=int(driver.execute_script("return document.body.scrollHeight"))
    for i in range(1,scroll,800): #Maximum step to read all the elements without skipping any of it
        driver.execute_script("window.scrollTo(0,{});".format(i))

#Render the JS code and store all the information in static HTML code

    html = driver.page_source

#Create a BeautifulSoup element for the above html variable

    beu_soup = BeautifulSoup(html, "html.parser")
    
#Inspect the url to find the required table class for scraping

    my_table=beu_soup.find('table', {'class':'sc-beb003d5-3 ieTeVa cmc-table '}) 
    rows = my_table.find_all('tr')[1:]

#Create a for loop to loop through each row and scrape the required columns either using a class or a cell index (td)

    for row in rows:
        name=row.find('p',class_='sc-4984dd93-0 kKpPOn')
        symbol=row.find('p',class_='sc-4984dd93-0 iqdbQL coin-item-symbol')
        rank=row.find('div',class_='sc-8497df48-3 erCSsg')
        price=row.find_all('td')[3]
        mcap=row.find_all('td')[7]
        website=row.find('a',class_='cmc-link')

#Append all the variable data to their respective lists

        Name.append(name.get_text())
        Symbol.append(symbol.get_text())
        Rank.append(rank.get_text())
        Price.append(price.get_text())
        Mcap.append(mcap.get_text())
        Website.append('https://coinmarketcap.com' + website.get('href'))
    
    driver.close() #close the url

In [41]:
#Convert all the lists into a dataframe

df=pd.DataFrame(list(zip(Rank, Name, Symbol, Price, Mcap, Website)),
              columns=['Rank based on Market Cap', 'Name', 'Symbol', 'Price', 'Market_Capital', 'Website'])
df.index=df.index + 1

In [43]:
#Write the dataframe to an excel file

writer=pd.ExcelWriter('Top300Cryptocurrencies_13JUN2023.xlsx')
df.to_excel(writer, index=False, sheet_name='Top300Cryptocurrencies')

#Dynamically adjust all the column widths

for column in df:
    column_length = max(df[column].astype(str).map(len).max(), len(column))
    col_index = df.columns.get_loc(column)
    writer.sheets['Top300Cryptocurrencies'].set_column(col_index, col_index, column_length)

writer.close()