In [None]:
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd
from etl.extract.extract import make_request

# defining constants:
SENDOU_BUILDS_URL = "https://sendou.ink/builds"
SENDOU_BASE_URL = "https://sendou.ink"


# function that returns the data extracted from sendou as a data frame


def extract_sendou_data():
    # call function that returns a
    # lists of paths to each weapon's page of builds
    # and it's weapon name
    path_list, weapon_list = weapon_build_paths()
    print(path_list)
    print(weapon_list)
    # call function returns the dataframe
    df_builds = create_weapon_build_df(path_list, weapon_list)
    display(df_builds)
    return None


# function to find paths for weapon build pages
def weapon_build_paths():
    # making a request to 'https://sendou.ink/builds'
    soup = BeautifulSoup(make_request(SENDOU_BUILDS_URL).text, "html.parser")

    paths = []  # empty list for storing paths
    weapon_names = []  # empty list for storing weapon names

    all_links = soup.find_all("a")  # list of all hyperlinks on the page

    # loop through all the links
    for link in all_links:
        path, weapon_name = search_for_build_path(link)
        paths = add_to_list(paths, path)
        weapon_names = add_to_list(weapon_names, weapon_name)
    if len(paths) == len(weapon_names):
        return paths, weapon_names
    else:
        raise Exception("Error: number of paths and weapon names don't match")


# function to find the build paths
def search_for_build_path(link):
    href = link.get("href")  # the href of attribute of <a>
    # make sure that the <a> has href
    if href is not None:
        # if href is not none
        # then continue to check if the link is for a weapon build page
        if href.startswith("/builds/"):
            # if it is then add the base path to the end and return this value
            path = SENDOU_BASE_URL + href
            # also find the text of the link (with strip remove whitespaces)
            text = link.get_text(strip=True)
            # if text is not none assign it to weapon_name
            if text is not None:
                weapon_name = text
            else:
                return None, None
            # brief pause between each path retrieval
            # (avoid overwhelming the website)
            time.sleep(0.1)
            # return the path and weapon name associated with the link
            return path, weapon_name
    return None, None


def add_to_list(the_list, element):
    if element is not None:
        the_list.append(element)
    return the_list


# function to create a dataframe from the the weapon build data
def create_weapon_build_df(path_list, weapon_list):
    # define dataframe columns
    df_columns = ['Weapon_name',
    'Main_1', 'Sub_1', 'Sub_2', 'Sub_3',
    'Main_2', 'Sub_4', 'Sub_5', 'Sub_6',
    'Main_3', 'Sub_7', 'Sub_8', 'Sub_9',
    'game_modes']
    
    # Create and empty data frame from the columns
    df_weapon_builds = pd.DataFrame(columns = df_columns) 
    
    count = 0 # counter for what weapon builds page we are currently on
    # repeat for each weapon path
    for path in path_list:
        # pause for each path
        time.sleep(1) 
        
        # if count  > 10:
        #      break
         
        path_soup = BeautifulSoup(make_request(path + '?limit=500').text, "html.parser")
        # message to show what weapon we are currently obtaining info from
        print("Scraping builds for: " + weapon_list[count])
        
        build_entries = path_soup.find_all('div',  class_='build')
    
        for build in build_entries:
            
            abilities = build.find_all('div', class_="build__ability readonly")
            ability_list = []
            
            for a in abilities:
                img_tag = a.find('img')
                alt_text = img_tag['alt']          
        
                ability_list.append((alt_text))
                       
            mode_list = []
            modes = build.find('div', class_='build__modes')
        
            if modes is None:
                mode_list = ['NO MODES LISTED']

            elif modes is not None:
                    
                img_tags = modes.find_all('img')

                for i in img_tags:
                    alt_text = i['alt']
                    mode_list.append(alt_text)
            
            df_weapon_builds.loc[len(df_weapon_builds)] = [weapon_list[count]] + ability_list + [mode_list]
        
        
        count += 1
    return df_weapon_builds

# function to scrape all builds for a single weapon:
def scrape_all_builds(path_soup):
    return None


# function to look at a single build and returns its info
def scrape_a_build():
    return None
    


# function to extract game mode info from a build
def extract_modes():
    return None

# function to extract ability info from a build
def extract_abilities():
    return None




In [None]:
extract_sendou_data()

['https://sendou.ink/builds/52-gal', 'https://sendou.ink/builds/52-gal-deco', 'https://sendou.ink/builds/96-gal', 'https://sendou.ink/builds/96-gal-deco', 'https://sendou.ink/builds/aerospray-mg', 'https://sendou.ink/builds/aerospray-rg', 'https://sendou.ink/builds/annaki-splattershot-nova', 'https://sendou.ink/builds/custom-jet-squelcher', 'https://sendou.ink/builds/custom-splattershot-jr', 'https://sendou.ink/builds/foil-squeezer', 'https://sendou.ink/builds/forge-splattershot-pro', 'https://sendou.ink/builds/h-3-nozzlenose', 'https://sendou.ink/builds/h-3-nozzlenose-d', 'https://sendou.ink/builds/jet-squelcher', 'https://sendou.ink/builds/l-3-nozzlenose', 'https://sendou.ink/builds/l-3-nozzlenose-d', 'https://sendou.ink/builds/n-zap-85', 'https://sendou.ink/builds/n-zap-89', 'https://sendou.ink/builds/neo-splash-o-matic', 'https://sendou.ink/builds/neo-sploosh-o-matic', 'https://sendou.ink/builds/splash-o-matic', 'https://sendou.ink/builds/splattershot', 'https://sendou.ink/builds/s

Unnamed: 0,Weapon_name,Main_1,Sub_1,Sub_2,Sub_3,Main_2,Sub_4,Sub_5,Sub_6,Main_3,Sub_7,Sub_8,Sub_9,game_modes
0,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Ink Resistance Up,Stealth Jump,Quick Super Jump,Quick Super Jump,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
1,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
2,.52 Gal,Swim Speed Up,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Ink Recovery Up,Special Saver,Quick Super Jump,Stealth Jump,Ink Resistance Up,Sub Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
3,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Swim Speed Up,Stealth Jump,Special Saver,Quick Super Jump,Ink Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
4,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30300,Splatana Wiper Deco,Sub Power Up,Ink Saver (Sub),Ink Saver (Sub),Ink Saver (Sub),Swim Speed Up,Swim Speed Up,Ink Resistance Up,Sub Resistance Up,Stealth Jump,Quick Super Jump,Quick Super Jump,Quick Super Jump,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
30301,Splatana Wiper Deco,Comeback,Quick Respawn,Quick Respawn,Quick Respawn,Haunt,Swim Speed Up,Swim Speed Up,Sub Power Up,Stealth Jump,Quick Respawn,Quick Respawn,Sub Power Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
30302,Splatana Wiper Deco,Comeback,Quick Respawn,Quick Respawn,Quick Respawn,Sub Power Up,Special Charge Up,Special Charge Up,Special Charge Up,Stealth Jump,Quick Respawn,Quick Super Jump,Sub Power Up,"[Turf War, Splat Zones, Tower Control, Clam Bl..."
30303,Splatana Wiper Deco,Sub Power Up,Sub Power Up,Sub Power Up,Quick Super Jump,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Charge Up,Stealth Jump,Quick Respawn,Quick Respawn,Special Saver,"[Turf War, Splat Zones, Tower Control, Rainmaker]"


In [8]:
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd
from etl.extract.extract import make_request

# defining constants:
SENDOU_BUILDS_URL = "https://sendou.ink/builds"
SENDOU_BASE_URL = "https://sendou.ink"

DF_COLUMNS = ['Weapon_name',
'Main_1', 'Sub_1', 'Sub_2', 'Sub_3',
'Main_2', 'Sub_4', 'Sub_5', 'Sub_6',
'Main_3', 'Sub_7', 'Sub_8', 'Sub_9',
'game_modes']


# function that returns the data extracted from sendou as a data frame
def extract_sendou_data():
    # call function that returns a
    # lists of paths to each weapon's page of builds
    # and it's weapon name
    path_list, weapon_list = weapon_build_paths()
    print(path_list)
    print(weapon_list)
    # call function returns the dataframe
    df_builds = create_weapon_build_df(path_list, weapon_list)
    display(df_builds)
    return None


# function to find paths for weapon build pages
def weapon_build_paths():
    # making a request to 'https://sendou.ink/builds'
    soup = BeautifulSoup(make_request(SENDOU_BUILDS_URL).text, "html.parser")

    paths = []  # empty list for storing paths
    weapon_names = []  # empty list for storing weapon names

    all_links = soup.find_all("a")  # list of all hyperlinks on the page

    # loop through all the links
    for link in all_links:
        path, weapon_name = search_for_build_path(link)
        paths = add_to_list(paths, path)
        weapon_names = add_to_list(weapon_names, weapon_name)
    if len(paths) == len(weapon_names):
        return paths, weapon_names
    else:
        raise Exception("Error: number of paths and weapon names don't match")


# function to find the build paths
def search_for_build_path(link):
    href = link.get("href")  # the href of attribute of <a>
    # make sure that the <a> has href
    if href is not None:
        # if href is not none
        # then continue to check if the link is for a weapon build page
        if href.startswith("/builds/"):
            # if it is then add the base path to the end and return this value
            path = SENDOU_BASE_URL + href
            # also find the text of the link (with strip remove whitespaces)
            text = link.get_text(strip=True)
            # if text is not none assign it to weapon_name
            if text is not None:
                weapon_name = text
            else:
                return None, None
            # brief pause between each path retrieval
            # (avoid overwhelming the website)
            time.sleep(0.1)
            # return the path and weapon name associated with the link
            return path, weapon_name
    return None, None


def add_to_list(the_list, element):
    if element is not None:
        the_list.append(element)
    return the_list


# function to create a dataframe from the the weapon build data
def create_weapon_build_df(path_list, weapon_list):
    # define dataframe columns

    
    # Create and empty data frame from the columns
    df_all_weapon_builds = pd.DataFrame(columns = DF_COLUMNS) 
    
    count = 0 # counter for what weapon builds page we are currently on
    # repeat for each weapon path
    for path in path_list:
         
        # pause for each path
        time.sleep(1) 
        
        if count  > 10:
               break
         
        path_soup = BeautifulSoup(make_request(path + '?limit=500').text, "html.parser")

        df_weapon_builds = scrape_all_builds(path_soup, weapon_list, count)
        
        df_all_weapon_builds = pd.concat([df_all_weapon_builds, df_weapon_builds], ignore_index=True)
        count += 1
    return df_all_weapon_builds

# function to scrape all builds for a single weapon:
def scrape_all_builds(path_soup, weapon_list, count):
    df_weapon_builds = pd.DataFrame(columns = DF_COLUMNS)
    # message to show what weapon we are currently obtaining info from
    print("Scraping builds for: " + weapon_list[count])
    
    build_entries = path_soup.find_all('div',  class_='build')

    for build in build_entries:
        
        ability_list, mode_list = scrape_a_build(build)
        
        df_weapon_builds.loc[len(df_weapon_builds)] = [weapon_list[count]] + ability_list + [mode_list]    
    return df_weapon_builds


# function to look at a single build and returns its info
def scrape_a_build(build):
    
    ability_list = extract_abilities(build)
    mode_list = extract_modes(build)   


    return ability_list, mode_list
    

# function to extract game mode info from a build
def extract_modes(build):
    
    mode_list = []
    modes = build.find('div', class_='build__modes')

    if modes is None:
        mode_list = ['NO MODES LISTED']

    elif modes is not None:
            
        img_tags = modes.find_all('img')

        for i in img_tags:
            alt_text = i['alt']
            mode_list.append(alt_text)
    
    return mode_list

# function to extract ability info from a build
def extract_abilities(build):
    abilities = build.find_all('div', class_="build__ability readonly")
    ability_list = []
    
    for a in abilities:
        img_tag = a.find('img')
        alt_text = img_tag['alt']          

        ability_list.append((alt_text))
    return ability_list




In [9]:
extract_sendou_data()

['https://sendou.ink/builds/52-gal', 'https://sendou.ink/builds/52-gal-deco', 'https://sendou.ink/builds/96-gal', 'https://sendou.ink/builds/96-gal-deco', 'https://sendou.ink/builds/aerospray-mg', 'https://sendou.ink/builds/aerospray-rg', 'https://sendou.ink/builds/annaki-splattershot-nova', 'https://sendou.ink/builds/custom-jet-squelcher', 'https://sendou.ink/builds/custom-splattershot-jr', 'https://sendou.ink/builds/foil-squeezer', 'https://sendou.ink/builds/forge-splattershot-pro', 'https://sendou.ink/builds/h-3-nozzlenose', 'https://sendou.ink/builds/h-3-nozzlenose-d', 'https://sendou.ink/builds/jet-squelcher', 'https://sendou.ink/builds/l-3-nozzlenose', 'https://sendou.ink/builds/l-3-nozzlenose-d', 'https://sendou.ink/builds/n-zap-85', 'https://sendou.ink/builds/n-zap-89', 'https://sendou.ink/builds/neo-splash-o-matic', 'https://sendou.ink/builds/neo-sploosh-o-matic', 'https://sendou.ink/builds/splash-o-matic', 'https://sendou.ink/builds/splattershot', 'https://sendou.ink/builds/s

Unnamed: 0,Weapon_name,Main_1,Sub_1,Sub_2,Sub_3,Main_2,Sub_4,Sub_5,Sub_6,Main_3,Sub_7,Sub_8,Sub_9,game_modes
0,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Ink Resistance Up,Stealth Jump,Quick Super Jump,Quick Super Jump,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
1,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
2,.52 Gal,Swim Speed Up,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Ink Recovery Up,Special Saver,Quick Super Jump,Stealth Jump,Ink Resistance Up,Sub Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
3,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Swim Speed Up,Stealth Jump,Special Saver,Quick Super Jump,Ink Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
4,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2635,Forge Splattershot Pro,Run Speed Up,Ink Saver (Main),Ink Saver (Main),Ink Saver (Main),Ink Recovery Up,Ink Recovery Up,Ink Recovery Up,Ink Recovery Up,Ink Saver (Main),Run Speed Up,Run Speed Up,Run Speed Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
2636,Forge Splattershot Pro,Last-Ditch Effort,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Charge Up,Ink Saver (Main),Ink Saver (Main),Quick Super Jump,Stealth Jump,Intensify Action,Intensify Action,Ink Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
2637,Forge Splattershot Pro,Ink Recovery Up,Sub Resistance Up,Sub Resistance Up,Special Charge Up,Ink Recovery Up,Special Power Up,Special Power Up,Special Power Up,Ink Saver (Main),Ink Saver (Main),Ink Saver (Main),Ink Saver (Main),"[Turf War, Splat Zones, Tower Control, Rainmak..."
2638,Forge Splattershot Pro,Last-Ditch Effort,Special Saver,Sub Power Up,Intensify Action,Thermal Ink,Ink Saver (Sub),Ink Recovery Up,Swim Speed Up,Special Charge Up,Special Charge Up,Special Charge Up,Ink Saver (Main),"[Splat Zones, Tower Control, Rainmaker, Clam B..."


In [None]:
extract_sendou_data()

['https://sendou.ink/builds/52-gal', 'https://sendou.ink/builds/52-gal-deco', 'https://sendou.ink/builds/96-gal', 'https://sendou.ink/builds/96-gal-deco', 'https://sendou.ink/builds/aerospray-mg', 'https://sendou.ink/builds/aerospray-rg', 'https://sendou.ink/builds/annaki-splattershot-nova', 'https://sendou.ink/builds/custom-jet-squelcher', 'https://sendou.ink/builds/custom-splattershot-jr', 'https://sendou.ink/builds/foil-squeezer', 'https://sendou.ink/builds/forge-splattershot-pro', 'https://sendou.ink/builds/h-3-nozzlenose', 'https://sendou.ink/builds/h-3-nozzlenose-d', 'https://sendou.ink/builds/jet-squelcher', 'https://sendou.ink/builds/l-3-nozzlenose', 'https://sendou.ink/builds/l-3-nozzlenose-d', 'https://sendou.ink/builds/n-zap-85', 'https://sendou.ink/builds/n-zap-89', 'https://sendou.ink/builds/neo-splash-o-matic', 'https://sendou.ink/builds/neo-sploosh-o-matic', 'https://sendou.ink/builds/splash-o-matic', 'https://sendou.ink/builds/splattershot', 'https://sendou.ink/builds/s

Unnamed: 0,Weapon_name,Main_1,Sub_1,Sub_2,Sub_3,Main_2,Sub_4,Sub_5,Sub_6,Main_3,Sub_7,Sub_8,Sub_9,game_modes
0,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Ink Resistance Up,Stealth Jump,Quick Super Jump,Quick Super Jump,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
1,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
2,.52 Gal,Swim Speed Up,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Ink Recovery Up,Special Saver,Quick Super Jump,Stealth Jump,Ink Resistance Up,Sub Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
3,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Swim Speed Up,Stealth Jump,Special Saver,Quick Super Jump,Ink Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
4,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2635,Forge Splattershot Pro,Run Speed Up,Ink Saver (Main),Ink Saver (Main),Ink Saver (Main),Ink Recovery Up,Ink Recovery Up,Ink Recovery Up,Ink Recovery Up,Ink Saver (Main),Run Speed Up,Run Speed Up,Run Speed Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
2636,Forge Splattershot Pro,Last-Ditch Effort,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Charge Up,Ink Saver (Main),Ink Saver (Main),Quick Super Jump,Stealth Jump,Intensify Action,Intensify Action,Ink Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
2637,Forge Splattershot Pro,Ink Recovery Up,Sub Resistance Up,Sub Resistance Up,Special Charge Up,Ink Recovery Up,Special Power Up,Special Power Up,Special Power Up,Ink Saver (Main),Ink Saver (Main),Ink Saver (Main),Ink Saver (Main),"[Turf War, Splat Zones, Tower Control, Rainmak..."
2638,Forge Splattershot Pro,Last-Ditch Effort,Special Saver,Sub Power Up,Intensify Action,Thermal Ink,Ink Saver (Sub),Ink Recovery Up,Swim Speed Up,Special Charge Up,Special Charge Up,Special Charge Up,Ink Saver (Main),"[Splat Zones, Tower Control, Rainmaker, Clam B..."


In [None]:
from bs4 import BeautifulSoup
# import requests
import time
import pandas as pd
from etl.extract.extract import make_request

# defining constants:
SENDOU_BUILDS_URL = "https://sendou.ink/builds"
SENDOU_BASE_URL = "https://sendou.ink"

DF_COLUMNS = [
    'Weapon_name',
    'Main_1', 'Sub_1', 'Sub_2', 'Sub_3',
    'Main_2', 'Sub_4', 'Sub_5', 'Sub_6',
    'Main_3', 'Sub_7', 'Sub_8', 'Sub_9',
    'game_modes'
]


# function that returns the data extracted from sendou as a data frame
def extract_sendou_data():
    # call function that returns a
    # lists of paths to each weapon's page of builds
    # and it's weapon name
    path_list, weapon_list = weapon_build_paths()
    print(path_list)
    print(weapon_list)
    # call function returns the dataframe for all builds
    df_builds = create_weapon_build_df(path_list, weapon_list)
    return df_builds


# function to find paths for weapon build pages
def weapon_build_paths():
    # making a request to 'https://sendou.ink/builds'
    soup = BeautifulSoup(make_request(SENDOU_BUILDS_URL).text, "html.parser")

    paths = []  # empty list for storing paths
    weapon_names = []  # empty list for storing weapon names

    all_links = soup.find_all("a")  # list of all hyperlinks on the page

    # loop through all the links
    for link in all_links:
        # call function that if the link is for a build
        # return the path and name
        path, weapon_name = search_for_build_path(link)
        # add this path to the list
        paths = add_to_list(paths, path)
        # add this weapon name to the list
        weapon_names = add_to_list(weapon_names, weapon_name)
    # check to see if both lists are of same size
    # so that each weapon has a path
    if len(paths) == len(weapon_names):
        return paths, weapon_names
    else:
        raise Exception("Error: number of paths and weapon names don't match")


# function to find the build paths
def search_for_build_path(link):
    href = link.get("href")  # the href of attribute of <a>
    # make sure that the <a> has href
    if href is not None:
        # if href is not none
        # then continue to check if the link is for a weapon build page
        if href.startswith("/builds/"):
            # if it is then add the base path to the end and return this value
            path = SENDOU_BASE_URL + href
            # also find the text of the link (with strip remove whitespaces)
            text = link.get_text(strip=True)
            # if text is not none assign it to weapon_name
            if text is not None:
                weapon_name = text
            else:
                return None, None
            # brief pause between each path retrieval
            # (avoid overwhelming the website)
            time.sleep(0.1)
            # return the path and weapon name associated with the link
            return path, weapon_name
    return None, None


# function to add an element to a list
# only if its not none
def add_to_list(the_list, element):
    if element is not None:
        the_list.append(element)
    return the_list


# function to create a dataframe from the the weapon build data
def create_weapon_build_df(path_list, weapon_list):
    # Create and empty data frame from the columns already defined
    # this will store all the builds listed on the website
    df_all_weapon_builds = pd.DataFrame(columns=DF_COLUMNS)
    count = 0  # counter for what weapon builds page we are currently on
    # repeat for each weapon path
    for path in path_list:
        # pause for each path
        time.sleep(1)
        if count > 0:
             break
        # make request to the path
        path_soup = BeautifulSoup(make_request(path + '?limit=500').text,
                                  "html.parser")
        # calls a function that returns all the builds of a single weapon
        # as a dataframe
        df_weapon_builds = scrape_all_builds(path_soup, weapon_list, count)
        # append to the dataframe of all weapon builds
        df_all_weapon_builds = pd.concat([df_all_weapon_builds,
                                          df_weapon_builds], ignore_index=True)
        count += 1
    return df_all_weapon_builds


# function to scrape all builds for a single weapon:
def scrape_all_builds(path_soup, weapon_list, count):
    # data frame for all builds for a weapon
    df_weapon_builds = pd.DataFrame(columns=DF_COLUMNS)
    # message to show what weapon we are currently obtaining info from
    print("Scraping builds for: " + weapon_list[count])
    # finds all the builds on the page
    build_entries = path_soup.find_all('div',  class_='build')
    # loops through these
    for build in build_entries:
        # scrape each build
        ability_list, mode_list = scrape_a_build(build)
        # use the lists created to add a new row to the dataframe
        df_weapon_builds.loc[len(df_weapon_builds)] = (
            [weapon_list[count]] + ability_list + [mode_list]
        )
    # return all builds for that weapon
    return df_weapon_builds


# function to look at a single build and returns its info
def scrape_a_build(build):
    # create a list of the abilities of a build
    ability_list = extract_abilities(build)
    # create a list of the modes of a build
    mode_list = extract_modes(build)
    # return both
    return ability_list, mode_list


# function to extract game mode info from a build
def extract_modes(build):
    mode_list = []  # create empty list to store modes
    # find all the modes
    modes = build.find('div', class_='build__modes')
    #  check if there are any modes listed
    if modes is None:
        # if none, add this message instead
        # placeholder for now
        mode_list = ['NO MODES LISTED']
    elif modes is not None:
        # if there is modes listed
        # find the image tags for modes
        # as each mode is represented by an image
        img_tags = modes.find_all('img')
        # loop through the found image tags
        for i in img_tags:
            # the alt text is the name of the mode
            alt_text = i['alt']
            # add to the list of modes
            mode_list.append(alt_text)
    # return this list
    return mode_list


# function to extract ability info from a build
def extract_abilities(build):
    # find all abilities
    abilities = build.find_all('div', class_="build__ability readonly")
    ability_list = []  # create empty list to store abilities
    # loop through the found abilities
    for a in abilities:
        # find the image of the ability
        img_tag = a.find('img')
        # find the alt text which has the name
        alt_text = img_tag['alt']
        # add to the list of abilities
        ability_list.append((alt_text))
    # return this list
    return ability_list


<div class="build__ability readonly"><img alt="Opening Gambit" src="/static-assets/img/abilities/OG.png"/></div>

In [None]:
<div class="build__modes"><img alt="Splat Zones" src="/static-assets/img/modes/SZ.png"/><img alt="Tower Control" src="/static-assets/img/modes/TC.png"/><img alt="Rainmaker" src="/static-assets/img/modes/RM.png"/><img alt="Clam Blitz" src="/static-assets/img/modes/CB.png"/></div>

In [35]:
extract_sendou_data()

['https://sendou.ink/builds/52-gal', 'https://sendou.ink/builds/52-gal-deco', 'https://sendou.ink/builds/96-gal', 'https://sendou.ink/builds/96-gal-deco', 'https://sendou.ink/builds/aerospray-mg', 'https://sendou.ink/builds/aerospray-rg', 'https://sendou.ink/builds/annaki-splattershot-nova', 'https://sendou.ink/builds/custom-jet-squelcher', 'https://sendou.ink/builds/custom-splattershot-jr', 'https://sendou.ink/builds/foil-squeezer', 'https://sendou.ink/builds/forge-splattershot-pro', 'https://sendou.ink/builds/h-3-nozzlenose', 'https://sendou.ink/builds/h-3-nozzlenose-d', 'https://sendou.ink/builds/jet-squelcher', 'https://sendou.ink/builds/l-3-nozzlenose', 'https://sendou.ink/builds/l-3-nozzlenose-d', 'https://sendou.ink/builds/n-zap-85', 'https://sendou.ink/builds/n-zap-89', 'https://sendou.ink/builds/neo-splash-o-matic', 'https://sendou.ink/builds/neo-sploosh-o-matic', 'https://sendou.ink/builds/splash-o-matic', 'https://sendou.ink/builds/splattershot', 'https://sendou.ink/builds/s

Unnamed: 0,Weapon_name,Main_1,Sub_1,Sub_2,Sub_3,Main_2,Sub_4,Sub_5,Sub_6,Main_3,Sub_7,Sub_8,Sub_9,game_modes
0,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Ink Resistance Up,Stealth Jump,Quick Super Jump,Quick Super Jump,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
1,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
2,.52 Gal,Swim Speed Up,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Ink Recovery Up,Special Saver,Quick Super Jump,Stealth Jump,Ink Resistance Up,Sub Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
3,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Swim Speed Up,Stealth Jump,Special Saver,Quick Super Jump,Ink Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
4,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,.52 Gal,Swim Speed Up,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Ink Recovery Up,Special Saver,Quick Super Jump,Stealth Jump,Ink Resistance Up,Sub Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
236,.52 Gal,Comeback,Special Charge Up,Special Charge Up,Ink Resistance Up,Swim Speed Up,Swim Speed Up,Swim Speed Up,Quick Super Jump,Stealth Jump,Intensify Action,Intensify Action,Sub Resistance Up,"[Turf War, Splat Zones, Tower Control, Rainmak..."
237,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
238,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."


In [37]:
# FINAL CODE (as  of 09/05/2025)

In [None]:
from bs4 import BeautifulSoup
# import requests
import time
import pandas as pd
from etl.extract.extract import make_request

# defining constants:
SENDOU_BUILDS_URL = "https://sendou.ink/builds"
SENDOU_BASE_URL = "https://sendou.ink"

DF_COLUMNS = [
    'Weapon_name',
    'Main_1', 'Sub_1', 'Sub_2', 'Sub_3',
    'Main_2', 'Sub_4', 'Sub_5', 'Sub_6',
    'Main_3', 'Sub_7', 'Sub_8', 'Sub_9',
    'game_modes'
]


# function that returns the data extracted from sendou as a data frame
def extract_sendou_data():
    # call function that returns a
    # lists of paths to each weapon's page of builds
    # and it's weapon name
    path_list, weapon_list = weapon_build_paths()
    # call function returns the dataframe for all builds
    df_builds = create_weapon_build_df(path_list, weapon_list)
    return df_builds


# function to find paths for weapon build pages
def weapon_build_paths():
    # making a request to 'https://sendou.ink/builds'
    soup = BeautifulSoup(make_request(SENDOU_BUILDS_URL).text, "html.parser")

    paths = []  # empty list for storing paths
    weapon_names = []  # empty list for storing weapon names

    all_links = soup.find_all("a")  # list of all hyperlinks on the page

    # loop through all the links
    for link in all_links:
        # call function that if the link is for a build
        # return the path and name
        path, weapon_name = search_for_build_path(link)
        # add this path to the list
        paths = add_to_list(paths, path)
        # add this weapon name to the list
        weapon_names = add_to_list(weapon_names, weapon_name)
    # check to see if both lists are of same size
    # so that each weapon has a path
    if len(paths) == len(weapon_names):
        return paths, weapon_names
    else:
        raise Exception("Error: number of paths and weapon names don't match")


# function to find the build paths
def search_for_build_path(link):
    href = link.get("href")  # the href of attribute of <a>
    # make sure that the <a> has href
    if href is not None:
        # if href is not none
        # then continue to check if the link is for a weapon build page
        if href.startswith("/builds/"):
            # if it is then add the base path to the end and return this value
            path = SENDOU_BASE_URL + href
            # also find the text of the link (with strip remove whitespaces)
            text = link.get_text(strip=True)
            # if text is not none assign it to weapon_name
            if text is not None:
                weapon_name = text
            else:
                return None, None
            # brief pause between each path retrieval
            # (avoid overwhelming the website)
            time.sleep(0.1)
            # return the path and weapon name associated with the link
            return path, weapon_name
    return None, None


# function to add an element to a list
# only if its not none
def add_to_list(the_list, element):
    if element is not None:
        the_list.append(element)
    return the_list


# function to create a dataframe from the the weapon build data
def create_weapon_build_df(path_list, weapon_list):
    # Create and empty data frame from the columns already defined
    # this will store all the builds listed on the website
    df_all_weapon_builds = pd.DataFrame(columns=DF_COLUMNS)
    count = 0  # counter for what weapon builds page we are currently on
    # repeat for each weapon path
    for path in path_list:
        # pause for each path
        time.sleep(1)
        # if count > 10:
        #      break
        # make request to the path
        path_soup = BeautifulSoup(make_request(path + '?limit=500').text,
                                  "html.parser")
        # calls a function that returns all the builds of a single weapon
        # as a dataframe
        df_weapon_builds = scrape_all_builds(path_soup, weapon_list, count)
        # append to the dataframe of all weapon builds
        df_all_weapon_builds = pd.concat([df_all_weapon_builds,
                                          df_weapon_builds], ignore_index=True)
        count += 1
    return df_all_weapon_builds


# function to scrape all builds for a single weapon:
def scrape_all_builds(path_soup, weapon_list, count):
    # data frame for all builds for a weapon
    df_weapon_builds = pd.DataFrame(columns=DF_COLUMNS)
    # message to show what weapon we are currently obtaining info from
    print("Scraping builds for: " + weapon_list[count])
    # finds all the builds on the page
    build_entries = path_soup.find_all('div',  class_='build')
    # loops through these
    for build in build_entries:
        # scrape each build
        ability_list, mode_list = scrape_a_build(build)
        # use the lists created to add a new row to the dataframe
        df_weapon_builds.loc[len(df_weapon_builds)] = (
            [weapon_list[count]] + ability_list + [mode_list]
        )
    # return all builds for that weapon
    return df_weapon_builds


# function to look at a single build and returns its info
def scrape_a_build(build):
    # create a list of the abilities of a build
    ability_list = extract_abilities(build)
    # create a list of the modes of a build
    mode_list = extract_modes(build)
    # return both
    return ability_list, mode_list


# function to extract game mode info from a build
def extract_modes(build):
    mode_list = []  # create empty list to store modes
    # find all the modes
    modes = build.find('div', class_='build__modes')
    #  check if there are any modes listed
    if modes is None:
        # if none, add this message instead
        # placeholder for now
        mode_list = ['NO MODES LISTED']
    elif modes is not None:
        # if there is modes listed
        # find the image tags for modes
        # as each mode is represented by an image
        img_tags = modes.find_all('img')
        # loop through the found image tags
        for i in img_tags:
            # the alt text is the name of the mode
            alt_text = i['alt']
            # add to the list of modes
            mode_list.append(alt_text)
    # return this list
    return mode_list


# function to extract ability info from a build
def extract_abilities(build):
    # find all abilities
    abilities = build.find_all('div', class_="build__ability readonly")
    ability_list = []  # create empty list to store abilities
    # loop through the found abilities
    for a in abilities:
        # find the image of the ability
        img_tag = a.find('img')
        # find the alt text which has the name
        alt_text = img_tag['alt']
        # add to the list of abilities
        ability_list.append((alt_text))
    # return this list
    return ability_list


In [39]:
extract_sendou_data()

Scraping builds for: .52 Gal
Scraping builds for: .52 Gal Deco
Scraping builds for: .96 Gal
Scraping builds for: .96 Gal Deco
Scraping builds for: Aerospray MG
Scraping builds for: Aerospray RG
Scraping builds for: Annaki Splattershot Nova
Scraping builds for: Custom Jet Squelcher
Scraping builds for: Custom Splattershot Jr.
Scraping builds for: Foil Squeezer
Scraping builds for: Forge Splattershot Pro
Scraping builds for: H-3 Nozzlenose
Scraping builds for: H-3 Nozzlenose D
Scraping builds for: Jet Squelcher
Scraping builds for: L-3 Nozzlenose
Scraping builds for: L-3 Nozzlenose D
Scraping builds for: N-ZAP '85
Scraping builds for: N-ZAP '89
Scraping builds for: Neo Splash-o-matic
Scraping builds for: Neo Sploosh-o-matic
Scraping builds for: Splash-o-matic
Scraping builds for: Splattershot
Scraping builds for: Splattershot Jr.
Scraping builds for: Splattershot Nova
Scraping builds for: Splattershot Pro
Scraping builds for: Sploosh-o-matic
Scraping builds for: Squeezer
Scraping builds 

Unnamed: 0,Weapon_name,Main_1,Sub_1,Sub_2,Sub_3,Main_2,Sub_4,Sub_5,Sub_6,Main_3,Sub_7,Sub_8,Sub_9,game_modes
0,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Ink Resistance Up,Stealth Jump,Quick Super Jump,Quick Super Jump,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
1,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
2,.52 Gal,Swim Speed Up,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Ink Recovery Up,Special Saver,Quick Super Jump,Stealth Jump,Ink Resistance Up,Sub Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
3,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Swim Speed Up,Stealth Jump,Special Saver,Quick Super Jump,Ink Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
4,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30295,Splatana Wiper Deco,Comeback,Swim Speed Up,Swim Speed Up,Special Saver,Sub Power Up,Sub Power Up,Sub Power Up,Sub Power Up,Sub Power Up,Special Power Up,Quick Super Jump,Sub Power Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
30296,Splatana Wiper Deco,Sub Power Up,Ink Saver (Sub),Ink Saver (Sub),Ink Saver (Sub),Swim Speed Up,Swim Speed Up,Ink Resistance Up,Sub Resistance Up,Stealth Jump,Quick Super Jump,Quick Super Jump,Quick Super Jump,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
30297,Splatana Wiper Deco,Comeback,Quick Respawn,Quick Respawn,Quick Respawn,Haunt,Swim Speed Up,Swim Speed Up,Sub Power Up,Stealth Jump,Quick Respawn,Quick Respawn,Sub Power Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
30298,Splatana Wiper Deco,Comeback,Quick Respawn,Quick Respawn,Quick Respawn,Sub Power Up,Special Charge Up,Special Charge Up,Special Charge Up,Stealth Jump,Quick Respawn,Quick Super Jump,Sub Power Up,"[Turf War, Splat Zones, Tower Control, Clam Bl..."


In [4]:
from bs4 import BeautifulSoup
# import requests
import time
import pandas as pd
from etl.extract.url_request import make_request

# defining constants:
SENDOU_BUILDS_URL = "https://sendou.ink/builds"
SENDOU_BASE_URL = "https://sendou.ink"

DF_COLUMNS = [
    'Weapon_name',
    'Main_1', 'Sub_1', 'Sub_2', 'Sub_3',
    'Main_2', 'Sub_4', 'Sub_5', 'Sub_6',
    'Main_3', 'Sub_7', 'Sub_8', 'Sub_9',
    'game_modes'
]

TESTING_MODE = False


# function that returns the data extracted from sendou as a data frame
def extract_sendou_data():
    # call function that returns a
    # lists of paths to each weapon's page of builds
    # and it's weapon name
    path_list, weapon_list = weapon_build_paths()
    # call function returns the dataframe for all builds
    df_builds = create_weapon_build_df(path_list, weapon_list)
    return df_builds


# function to find paths for weapon build pages
def weapon_build_paths():
    # making a request to 'https://sendou.ink/builds'
    soup = BeautifulSoup(make_request(SENDOU_BUILDS_URL).text, "html.parser")

    paths = []  # empty list for storing paths
    weapon_names = []  # empty list for storing weapon names

    all_links = soup.find_all("a")  # list of all hyperlinks on the page
    # --------- for testing -----------
    if TESTING_MODE is True:
        all_links = all_links[:10]
    # ---------------------------------
    # loop through all the links
    for link in all_links:
        # call function that if the link is for a build
        # return the path and name
        path, weapon_name = search_for_build_path(link)
        # add this path to the list
        paths = add_to_list(paths, path)
        # add this weapon name to the list
        weapon_names = add_to_list(weapon_names, weapon_name)
    # check to see if both lists are of same size
    # so that each weapon has a path
    if len(paths) == len(weapon_names):
        return paths, weapon_names
    else:
        raise Exception("Error: number of paths and weapon names don't match")


# function to find the build paths
def search_for_build_path(link):
    href = link.get("href")  # the href of attribute of <a>
    # make sure that the <a> has href
    if href is not None:
        # if href is not none
        # then continue to check if the link is for a weapon build page
        if href.startswith("/builds/"):
            # if it is then add the base path to the end and return this value
            path = SENDOU_BASE_URL + href
            # also find the text of the link (with strip remove whitespaces)
            text = link.get_text(strip=True)
            # if text is not none assign it to weapon_name
            if text is not None:
                weapon_name = text
            else:
                return None, None
            # brief pause between each path retrieval
            # (avoid overwhelming the website)
            time.sleep(0.1)
            # return the path and weapon name associated with the link
            return path, weapon_name
    return None, None


# function to add an element to a list
# only if its not none
def add_to_list(the_list, element):
    if element is not None:
        the_list.append(element)
    return the_list


# function to create a dataframe from the the weapon build data
def create_weapon_build_df(path_list, weapon_list):
    # Create and empty data frame from the columns already defined
    # this will store all the builds listed on the website
    df_all_weapon_builds = pd.DataFrame(columns=DF_COLUMNS)
    count = 0  # counter for what weapon builds page we are currently on
    # repeat for each weapon path
    for path in path_list:
        # pause for each path
        time.sleep(1)
        # if count > 10:
        #      break
        # make request to the path
        path_soup = BeautifulSoup(make_request(path + '?limit=500').text,
                                  "html.parser")
        # calls a function that returns all the builds of a single weapon
        # as a dataframe
        df_weapon_builds = scrape_all_builds(path_soup, weapon_list, count)
        # append to the dataframe of all weapon builds
        df_all_weapon_builds = pd.concat([df_all_weapon_builds,
                                          df_weapon_builds], ignore_index=True)
        count += 1
    return df_all_weapon_builds


# function to scrape all builds for a single weapon:
def scrape_all_builds(path_soup, weapon_list, count):
    # data frame for all builds for a weapon
    df_weapon_builds = pd.DataFrame(columns=DF_COLUMNS)
    # message to show what weapon we are currently obtaining info from
    print("Scraping builds for: " + weapon_list[count])
    # finds all the builds on the page
    build_entries = path_soup.find_all('div',  class_='build')
    # loops through these
    for build in build_entries:
        # scrape each build
        ability_list, mode_list = scrape_a_build(build)
        # use the lists created to add a new row to the dataframe
        df_weapon_builds.loc[len(df_weapon_builds)] = (
            [weapon_list[count]] + ability_list + [mode_list]
        )
    # return all builds for that weapon
    return df_weapon_builds


# function to look at a single build and returns its info
def scrape_a_build(build):
    # create a list of the abilities of a build
    ability_list = extract_abilities(build)
    # create a list of the modes of a build
    mode_list = extract_modes(build)
    # return both
    return ability_list, mode_list


# function to extract game mode info from a build
def extract_modes(build):
    mode_list = []  # create empty list to store modes
    # find all the modes
    modes = build.find('div', class_='build__modes')
    #  check if there are any modes listed
    if modes is None:
        # if none, add this message instead
        # placeholder for now
        mode_list = ['NO MODES LISTED']
    elif modes is not None:
        # if there is modes listed
        # find the image tags for modes
        # as each mode is represented by an image
        img_tags = modes.find_all('img')
        # loop through the found image tags
        for i in img_tags:
            # the alt text is the name of the mode
            alt_text = i['alt']
            # add to the list of modes
            mode_list.append(alt_text)
    # return this list
    return mode_list


# function to extract ability info from a build
def extract_abilities(build):
    # find all abilities
    abilities = build.find_all('div', class_="build__ability readonly")
    ability_list = []  # create empty list to store abilities
    # loop through the found abilities
    for a in abilities:
        # find the image of the ability
        img_tag = a.find('img')
        # find the alt text which has the name
        alt_text = img_tag['alt']
        # add to the list of abilities
        ability_list.append((alt_text))
    # return this list
    return ability_list


In [5]:
display(extract_sendou_data())

Scraping builds for: .52 Gal
Scraping builds for: .52 Gal Deco
Scraping builds for: .96 Gal
Scraping builds for: .96 Gal Deco
Scraping builds for: Aerospray MG
Scraping builds for: Aerospray RG
Scraping builds for: Annaki Splattershot Nova
Scraping builds for: Custom Jet Squelcher
Scraping builds for: Custom Splattershot Jr.
Scraping builds for: Foil Squeezer
Scraping builds for: Forge Splattershot Pro
Scraping builds for: H-3 Nozzlenose
Scraping builds for: H-3 Nozzlenose D
Scraping builds for: Jet Squelcher
Scraping builds for: L-3 Nozzlenose
Scraping builds for: L-3 Nozzlenose D
Scraping builds for: N-ZAP '85
Scraping builds for: N-ZAP '89
Scraping builds for: Neo Splash-o-matic
Scraping builds for: Neo Sploosh-o-matic
Scraping builds for: Splash-o-matic
Scraping builds for: Splattershot
Scraping builds for: Splattershot Jr.
Scraping builds for: Splattershot Nova
Scraping builds for: Splattershot Pro
Scraping builds for: Sploosh-o-matic
Scraping builds for: Squeezer
Scraping builds 

Unnamed: 0,Weapon_name,Main_1,Sub_1,Sub_2,Sub_3,Main_2,Sub_4,Sub_5,Sub_6,Main_3,Sub_7,Sub_8,Sub_9,game_modes
0,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Ink Resistance Up,Stealth Jump,Quick Super Jump,Quick Super Jump,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
1,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
2,.52 Gal,Swim Speed Up,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Ink Recovery Up,Special Saver,Quick Super Jump,Stealth Jump,Ink Resistance Up,Sub Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
3,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Swim Speed Up,Stealth Jump,Special Saver,Quick Super Jump,Ink Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
4,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30292,Splatana Wiper Deco,Comeback,Swim Speed Up,Swim Speed Up,Special Saver,Sub Power Up,Sub Power Up,Sub Power Up,Sub Power Up,Sub Power Up,Special Power Up,Quick Super Jump,Sub Power Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
30293,Splatana Wiper Deco,Sub Power Up,Ink Saver (Sub),Ink Saver (Sub),Ink Saver (Sub),Swim Speed Up,Swim Speed Up,Ink Resistance Up,Sub Resistance Up,Stealth Jump,Quick Super Jump,Quick Super Jump,Quick Super Jump,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
30294,Splatana Wiper Deco,Comeback,Quick Respawn,Quick Respawn,Quick Respawn,Haunt,Swim Speed Up,Swim Speed Up,Sub Power Up,Stealth Jump,Quick Respawn,Quick Respawn,Sub Power Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
30295,Splatana Wiper Deco,Comeback,Quick Respawn,Quick Respawn,Quick Respawn,Sub Power Up,Special Charge Up,Special Charge Up,Special Charge Up,Stealth Jump,Quick Respawn,Quick Super Jump,Sub Power Up,"[Turf War, Splat Zones, Tower Control, Clam Bl..."


In [1]:
from bs4 import BeautifulSoup
# import requests
import time
import pandas as pd
from etl.extract.url_request import make_request

# defining constants:
SENDOU_BUILDS_URL = "https://sendou.ink/builds"
SENDOU_BASE_URL = "https://sendou.ink"

DF_COLUMNS = [
    'Weapon_name',
    'Main_1', 'Sub_1', 'Sub_2', 'Sub_3',
    'Main_2', 'Sub_4', 'Sub_5', 'Sub_6',
    'Main_3', 'Sub_7', 'Sub_8', 'Sub_9',
    'game_modes'
]

TESTING_MODE = True


# function that returns the data extracted from sendou as a data frame
def extract_sendou_data():
    # call function that returns a
    # lists of paths to each weapon's page of builds
    # and it's weapon name
    path_list, weapon_list = weapon_build_paths()
    # call function returns the dataframe for all builds
    df_builds = create_weapon_build_df(path_list, weapon_list)
    return df_builds


# function to find paths for weapon build pages
def weapon_build_paths():
    # making a request to 'https://sendou.ink/builds'
    soup = BeautifulSoup(make_request(SENDOU_BUILDS_URL).text, "html.parser")

    paths = []  # empty list for storing paths
    weapon_names = []  # empty list for storing weapon names

    all_links = soup.find_all("a")  # list of all hyperlinks on the page
    # --------- for testing -----------
    if TESTING_MODE is True:
        all_links = all_links[:10]
    # ---------------------------------
    # loop through all the links
    for link in all_links:
        # call function that if the link is for a build
        # return the path and name
        path, weapon_name = search_for_build_path(link)
        # add this path to the list
        paths = add_to_list(paths, path)
        # add this weapon name to the list
        weapon_names = add_to_list(weapon_names, weapon_name)
    # check to see if both lists are of same size
    # so that each weapon has a path
    if len(paths) == len(weapon_names):
        return paths, weapon_names
    else:
        raise Exception("Error: number of paths and weapon names don't match")


# function to find the build paths
def search_for_build_path(link):
    href = link.get("href")  # the href of attribute of <a>
    # make sure that the <a> has href
    if href is not None:
        # if href is not none
        # then continue to check if the link is for a weapon build page
        if href.startswith("/builds/"):
            # if it is then add the base path to the end and return this value
            path = SENDOU_BASE_URL + href
            # also find the text of the link (with strip remove whitespaces)
            text = link.get_text(strip=True)
            # if text is not none assign it to weapon_name
            if text is not None:
                weapon_name = text
            else:
                return None, None
            # brief pause between each path retrieval
            # (avoid overwhelming the website)
            time.sleep(0.1)
            # return the path and weapon name associated with the link
            return path, weapon_name
    return None, None


# function to add an element to a list
# only if its not none
def add_to_list(the_list, element):
    if element is not None:
        the_list.append(element)
    return the_list


# function to create a dataframe from the the weapon build data
def create_weapon_build_df(path_list, weapon_list):
    # Create and empty data frame from the columns already defined
    # this will store all the builds listed on the website
    df_all_weapon_builds = pd.DataFrame(columns=DF_COLUMNS)
    count = 0  # counter for what weapon builds page we are currently on
    # repeat for each weapon path
    for path in path_list:
        # pause for each path
        time.sleep(1)
        # if count > 10:
        #      break
        # make request to the path
        path_soup = BeautifulSoup(make_request(path + '?limit=500').text,
                                  "html.parser")
        # calls a function that returns all the builds of a single weapon
        # as a dataframe
        df_weapon_builds = scrape_all_builds(path_soup, weapon_list, count)
        # append to the dataframe of all weapon builds
        df_all_weapon_builds = pd.concat([df_all_weapon_builds,
                                          df_weapon_builds], ignore_index=True)
        count += 1
    return df_all_weapon_builds


# function to scrape all builds for a single weapon:
def scrape_all_builds(path_soup, weapon_list, count):
    # data frame for all builds for a weapon
    df_weapon_builds = pd.DataFrame(columns=DF_COLUMNS)
    # message to show what weapon we are currently obtaining info from
    print("Scraping builds for: " + weapon_list[count])
    # finds all the builds on the page
    build_entries = path_soup.find_all('div',  class_='build')
    # loops through these
    for build in build_entries:
        # scrape each build
        ability_list, mode_list = scrape_a_build(build)
        # use the lists created to add a new row to the dataframe
        df_weapon_builds.loc[len(df_weapon_builds)] = (
            [weapon_list[count]] + ability_list + [mode_list]
        )
    # return all builds for that weapon
    return df_weapon_builds


# function to look at a single build and returns its info
def scrape_a_build(build):
    # create a list of the abilities of a build
    ability_list = extract_abilities(build)
    # create a list of the modes of a build
    mode_list = extract_modes(build)
    # return both
    return ability_list, mode_list


# function to extract game mode info from a build
def extract_modes(build):
    mode_list = []  # create empty list to store modes
    # find all the modes
    modes = build.find('div', class_='build__modes')
    #  check if there are any modes listed
    if modes is None:
        # if none, add this message instead
        # placeholder for now
        mode_list = ['NO MODES LISTED']
    elif modes is not None:
        # if there is modes listed
        # find the image tags for modes
        # as each mode is represented by an image
        img_tags = modes.find_all('img')
        # loop through the found image tags
        for i in img_tags:
            # the alt text is the name of the mode
            alt_text = i['alt']
            # add to the list of modes
            mode_list.append(alt_text)
    # return this list
    return mode_list


# function to extract ability info from a build
def extract_abilities(build):
    # find all abilities
    abilities = build.find_all('div', class_="build__ability readonly")
    ability_list = []  # create empty list to store abilities
    # loop through the found abilities
    for a in abilities:
        # find the image of the ability
        img_tag = a.find('img')
        # find the alt text which has the name
        alt_text = img_tag['alt']
        # add to the list of abilities
        ability_list.append((alt_text))
    # return this list
    return ability_list


In [2]:
display(extract_sendou_data())

Scraping builds for: .52 Gal
Scraping builds for: .52 Gal Deco
Scraping builds for: .96 Gal
Scraping builds for: .96 Gal Deco
Scraping builds for: Aerospray MG
Scraping builds for: Aerospray RG
Scraping builds for: Annaki Splattershot Nova
Scraping builds for: Custom Jet Squelcher


Unnamed: 0,Weapon_name,Main_1,Sub_1,Sub_2,Sub_3,Main_2,Sub_4,Sub_5,Sub_6,Main_3,Sub_7,Sub_8,Sub_9,game_modes
0,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Ink Resistance Up,Stealth Jump,Quick Super Jump,Quick Super Jump,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
1,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
2,.52 Gal,Swim Speed Up,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Ink Recovery Up,Special Saver,Quick Super Jump,Stealth Jump,Ink Resistance Up,Sub Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
3,.52 Gal,Comeback,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Swim Speed Up,Stealth Jump,Special Saver,Quick Super Jump,Ink Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
4,.52 Gal,Opening Gambit,Intensify Action,Intensify Action,Intensify Action,Swim Speed Up,Swim Speed Up,Swim Speed Up,Special Saver,Stealth Jump,Quick Super Jump,Ink Resistance Up,Sub Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1915,Custom Jet Squelcher,Swim Speed Up,Special Charge Up,Special Charge Up,Run Speed Up,Ink Saver (Main),Intensify Action,Intensify Action,Special Saver,Ink Saver (Main),Quick Super Jump,Ink Resistance Up,Sub Resistance Up,"[Turf War, Splat Zones, Tower Control, Rainmak..."
1916,Custom Jet Squelcher,Special Charge Up,Swim Speed Up,Swim Speed Up,Special Saver,Ink Saver (Main),Special Power Up,Special Power Up,Quick Super Jump,Stealth Jump,Intensify Action,Intensify Action,Ink Resistance Up,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
1917,Custom Jet Squelcher,Special Charge Up,Ink Saver (Main),Ink Saver (Main),Quick Super Jump,Thermal Ink,Sub Power Up,Sub Power Up,Intensify Action,Run Speed Up,Ink Resistance Up,Ink Resistance Up,Intensify Action,"[Splat Zones, Tower Control, Rainmaker, Clam B..."
1918,Custom Jet Squelcher,Last-Ditch Effort,Ink Saver (Main),Ink Saver (Main),Ink Saver (Main),Special Charge Up,Ink Recovery Up,Ink Recovery Up,Quick Super Jump,Special Charge Up,Run Speed Up,Run Speed Up,Intensify Action,[NO MODES LISTED]
