In [24]:
import requests
import random
from bs4 import BeautifulSoup
from time import sleep
import json
import pandas as pd



In [25]:
def pick_random_user_agent():
    user_agents = [
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_2_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:97.0) Gecko/20100101 Firefox/97.0",
        "Mozilla/5.0 (X11; Linux i686; rv:97.0) Gecko/20100101 Firefox/97.0",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 12.2; rv:97.0) Gecko/20100101 Firefox/97.0",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_2_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.2 Safari/605.1.15",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 OPR/83.0.4254.27",
        "Mozilla/5.0 (Windows NT 10.0; WOW64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 OPR/83.0.4254.27",
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 OPR/83.0.4254.27"
    ]

    header = {"user-agent": random.choice(user_agents)}

    return header


In [26]:
def get_bscscan():
    header = pick_random_user_agent()

    while True:
        response = requests.get(
            "https://bscscan.com/contractsVerified?ps=100", headers=header, timeout=5
        )

        if response.status_code == 200:
            break
        else:
            header = pick_random_user_agent()

    return response.content


In [27]:
def parse_body(body):
    parsed_body = pandas.read_html(body)[0]

    results_array = []
    for i, row in parsed_body.iterrows():
        contract = {
            "position": i,
            "name": row["Contract Name"],
            "compiler": row["Compiler"],
            "compiler_version": row["Version"],
            "license": row["License"],
            "balance": row["Balance"],
            "transactions": row["Txns"],
            "address": row["Address"],
            "contract_url": "https://bscscan.com/address/" + row['Address'],
            "token_url":
            "https://bscscan.com/token/" + row['Address'] + "#balances",
            "holders_url":
            "https://bscscan.com/token/tokenholderchart/"
            + row['Address']
            + "?range=500",
        }
        results_array.append(contract)

    return results_array

In [28]:
def get_token_page(link):
    header = pick_random_user_agent()

    while True:
        response = requests.get(link, headers=header, timeout=5)

        if response.status_code == 200:
            break
        else:
            header = pick_random_user_agent()

    return response.content


In [29]:
def parse_token_page(body):

    token_page = get_token_page(body["token_url"])

    sleep(0.5)

    parsed_body = BeautifulSoup(token_page, "html.parser")

    page_dictionary = {}

    name_element = parsed_body.select_one(".media-body .small")
    if name_element is None:
        page_dictionary = "Not Existing"
    elif name_element is not None:
        page_dictionary["name"] = name_element.text[:-1]

        overview_element = parsed_body.select_one(
            ".card:has(#ContentPlaceHolder1_tr_valuepertoken)"
        )
        if overview_element is not None:

            overview_dictionary = {}

            token_standart = overview_element.select_one(".ml-1 b")
            if token_standart is not None:
                overview_dictionary["token_standart"] = token_standart.text

            token_price = overview_element.select_one(".d-block span:nth-child(1)")
            if token_price is not None:
                overview_dictionary["token_price"] = float(token_price.text.replace('$', ''))

            token_marketcap = overview_element.select_one("#pricebutton")
            if token_marketcap is not None:
                overview_dictionary["token_marketcap"] = float(
                    token_marketcap.text[2:-1].replace('$', '')
                )

            token_supply = overview_element.select_one(".hash-tag")
            if token_supply is not None:
                overview_dictionary["token_supply"] = float(
                    token_supply.text.replace(",", "")
                )

            token_holders = overview_element.select_one(
                "#ContentPlaceHolder1_tr_tokenHolders .mr-3"
            )
            if token_holders is not None:
                overview_dictionary["token_holders"] = int(token_holders.text[1:-11].replace(',', ''))

            token_transfers = overview_element.select_one("#totaltxns")
            if token_transfers is not None:
                overview_dictionary["token_transfers"] = int(token_transfers.text.replace(',', '')) if token_transfers.text != '-' else 0
            token_socials = overview_element.select_one(
                "#ContentPlaceHolder1_trDecimals+ div .col-md-8"
            )
            if token_socials is not None:
                overview_dictionary["token_socials"] = token_socials.text

            if overview_dictionary["token_holders"] != 0:
                parsed_body = BeautifulSoup(
                    get_token_page(body["holders_url"]), "html.parser"
                )

                holders_dictionary = {}

                holder_addresses = parsed_body.select(
                    "#ContentPlaceHolder1_resultrows a"
                )
                holder_quantities = parsed_body.select("td:nth-child(3)")
                holder_percentages = parsed_body.select("td:nth-child(4)")

                for rank in range(len(holder_addresses)):
                    holders_dictionary[rank] = {}

                    holders_dictionary[rank]["address"] = holder_addresses[rank].text
                    holders_dictionary[rank]["quantity"] = float(
                        holder_quantities[rank].text.replace(",", "")
                    )
                    holders_dictionary[rank]["percentage"] = float(
                        holder_percentages[rank].text[:-1].replace(",", "")
                    )

                page_dictionary["holders_dictionary"] = holders_dictionary

            page_dictionary["overview_dictionary"] = overview_dictionary

    return page_dictionary


body = get_bscscan()
results_array = parse_body(body)
for token_dictionary in results_array:
    page_dictionary = parse_token_page(token_dictionary)

    token_dictionary["page_dictionary"] = page_dictionary

    print(token_dictionary)


with open("results.json", "w+") as f:
    json.dump(results_array, f, indent=2)
    
    


{'position': 0, 'name': 'DigiCAT', 'compiler': 'Solidity', 'compiler_version': '0.8.7', 'license': 'MIT', 'balance': '0 BNB', 'transactions': 1, 'address': '0x6fc19c6d5e8166145db1c0b2cabc59bd16094bff', 'contract_url': 'https://bscscan.com/address/0x6fc19c6d5e8166145db1c0b2cabc59bd16094bff', 'token_url': 'https://bscscan.com/token/0x6fc19c6d5e8166145db1c0b2cabc59bd16094bff#balances', 'holders_url': 'https://bscscan.com/token/tokenholderchart/0x6fc19c6d5e8166145db1c0b2cabc59bd16094bff?range=500', 'page_dictionary': {'name': 'DigiCAT', 'holders_dictionary': {0: {'address': '0x2a57041458aefbeeb723c4678a84bec0582d1e59', 'quantity': 1000000.0, 'percentage': 100.0}}, 'overview_dictionary': {'token_standart': 'BEP-20', 'token_price': 0.0, 'token_marketcap': 0.0, 'token_supply': 1000000.0, 'token_holders': 1, 'token_transfers': 0}}}
{'position': 1, 'name': 'tokenRelaunch', 'compiler': 'Solidity', 'compiler_version': '0.8.0', 'license': 'MIT', 'balance': '0 BNB', 'transactions': 1, 'address': '0

In [30]:
with open('results.json') as json_file:
    data1 = json.load(json_file)
data_file = open('/Users/anitateladevalapalli/Documents/results.csv', 'w', newline='')
csv_writer = csv.writer(data_file)
count = 0
for data in data1:
    if count == 0:
        header = data.keys()
        csv_writer.writerow(header)
        count += 1
    csv_writer.writerow(data.values())
 
data_file.close()

In [31]:
df=pd.read_csv("/Users/anitateladevalapalli/Documents/results.csv")

In [32]:
df.head()

Unnamed: 0,position,name,compiler,compiler_version,license,balance,transactions,address,contract_url,token_url,holders_url,page_dictionary
0,0,DigiCAT,Solidity,0.8.7,MIT,0 BNB,1,0x6fc19c6d5e8166145db1c0b2cabc59bd16094bff,https://bscscan.com/address/0x6fc19c6d5e816614...,https://bscscan.com/token/0x6fc19c6d5e8166145d...,https://bscscan.com/token/tokenholderchart/0x6...,"{'name': 'DigiCAT', 'holders_dictionary': {'0'..."
1,1,tokenRelaunch,Solidity,0.8.0,MIT,0 BNB,1,0x2ac2278a3deb5a6ffd38a1fb3c56f5961290d2dd,https://bscscan.com/address/0x2ac2278a3deb5a6f...,https://bscscan.com/token/0x2ac2278a3deb5a6ffd...,https://bscscan.com/token/tokenholderchart/0x2...,Not Existing
2,2,CATSwapToken,Solidity,0.8.10,MIT,0 BNB,2,0x98af0a7bf894cff5300b5a85f5e0aad934df05d4,https://bscscan.com/address/0x98af0a7bf894cff5...,https://bscscan.com/token/0x98af0a7bf894cff530...,https://bscscan.com/token/tokenholderchart/0x9...,"{'name': 'CAT Swap', 'holders_dictionary': {'0..."
3,3,ERC20,Solidity,0.6.6,,0 BNB,2,0xbc4F92F9c4187910436DD80990444EE5F520AcF1,https://bscscan.com/address/0xbc4F92F9c4187910...,https://bscscan.com/token/0xbc4F92F9c418791043...,https://bscscan.com/token/tokenholderchart/0xb...,"{'name': 'OverMoon Doge', 'holders_dictionary'..."
4,4,BabiNgepetInu,Solidity,0.8.6,MIT,0 BNB,2,0x8bea4e26f0d8525fbfbeb9906799f04a8d654ce0,https://bscscan.com/address/0x8bea4e26f0d8525f...,https://bscscan.com/token/0x8bea4e26f0d8525fbf...,https://bscscan.com/token/tokenholderchart/0x8...,"{'name': 'Babi Ngepet Inu', 'holders_dictionar..."
