In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from pygeocoder import Geocoder

In [2]:
# I need the URL for several pages that that hold dunker's information
prefix = "https://wda.do/dunkers?kind="
dunker_levels = ["pro", "amateur", "nba", "nba_retired", "retired"]
urls_by_level = [f"{prefix}{level}" for level in dunker_levels]

In [3]:
# Collects all the individual dunker profile links
profile_prefix = "https://wda.do"
dunker_links = []
for url in urls_by_level:
    response = requests.get(url).text
    soup = BeautifulSoup(response, "html.parser")
    a_tags = soup.find_all("a", class_="card p-3 dss-block")
    for link in a_tags:
        dunker_links.append(f"{profile_prefix}{link['href']}")  

In [4]:
# First lets create a list of dunker names
names = []
imgs = []
locations = []
leagues = []
dunker_socials = []
for url in urls_by_level:
    response = requests.get(url).text
    soup = BeautifulSoup(response, "html.parser")
    # Get list of names
    for name in soup.findAll("h1", {"class":"h1"}):
        names.append(name.text)
    # Get list of profile imgages
    for img in soup.findAll("img", {"alt":"Avatar"}):
        imgs.append(img["src"])
    # Get list of locations
    for location in soup.findAll(class_= "country-block"):
        locations.append(location.text.strip())
    # Get list of leagues
    for league in soup.findAll(class_="dunker-kind"):
        leagues.append(league.text.strip())

In [11]:
# Get list of tuples of dunker socials
for individual_link in dunker_links:
    r = requests.get(individual_link).text
    soup = BeautifulSoup(r, "html.parser")
    insta_link = soup.find("a", {"target": "_blank", "href":re.compile("instagram"), "class":False})
    yt_link = soup.find("a", {"target":"_blank", "href":re.compile("youtube"), "class":False})
    dunker_socials.append((insta_link["href"] if insta_link is not None else "N/A", yt_link["href"] if yt_link is not None else "N/A"))

247

In [None]:
dunkerdf = pd.DataFrame()
dunkerdf["Name"] = names
dunkerdf["Avatar"] = imgs
dunkerdf["Location"] = locations
dunkerdf["League"] = leagues
dunkerdf.replace("", "AMATEUR", inplace=True)
dunkerdf["Instagram"] = [insta for insta, yt in dunker_socials]
dunkerdf["Youtube"] = [yt for insta, yt in dunker_socials]

# To add coordinates to dunker object, location must be converted using a geocoder API. 
# Calls might be expensive. To save resources, lets create a dictionary for locations where the coordinates have already been found.


In [None]:
dunkerdf.to_csv("dunker_data.csv")