In [1]:
from etl.extract.extract_inkipedia import extract_inkipedia
df_weapons = extract_inkipedia()
display(df_weapons)

Unnamed: 0,Extra_space,Name,ID,Sub,Special,Special_Points,Level,Price,Class,Introduced
0,,Sploosh-o-matic,0,Curling Bomb,Ultra Stamp,180p,9,1,Shooter,Version1.0.0(Drizzle Season 2022)
1,,Neo Sploosh-o-matic,1,Squid Beakon,Killer Wail 5.1,170p,13,1,Shooter,Version3.0.0(Fresh Season 2023)
2,,Splattershot Jr.,10,Splat Bomb,Big Bubbler,180p,1,,Shooter,Version1.0.0(Drizzle Season 2022)
3,,Custom Splattershot Jr.,11,Torpedo,Wave Breaker,190p,6,1,Shooter,Version2.0.0(Chill Season 2022)
4,,Splash-o-matic,20,Burst Bomb,Crab Tank,200p,16,1,Shooter,Version1.0.0(Drizzle Season 2022)
...,...,...,...,...,...,...,...,...,...,...
138,,Order Splatana Replica,8005,Burst Bomb,Zipcaster,210p,1,,Splatana,Version7.0.0(Side Order)
139,,Splatana Wiper,8010,Torpedo,Ultra Stamp,190p,5,1,Splatana,Version1.0.0(Drizzle Season 2022)
140,,Splatana Wiper Deco,8011,Squid Beakon,Tenta Missiles,190p,10,1,Splatana,Version4.0.0(Sizzle Season 2023)
141,,Mint Decavitator,8020,Suction Bomb,Big Bubbler,210p,21,1,Splatana,Version8.0.0(Sizzle Season 2024)


In [10]:
from bs4 import BeautifulSoup
import pandas as pd
from etl.extract.url_request import make_request
import time

SUBS_GALLERY_PATH = (
    'https://splatoonwiki.org/wiki/Category:Splatoon_3_sub_weapon_icons'
)


# function to extract all subs weapon images
# from inkipedia as a dataframe
def extract_subs_images(df_weapons):
    print("\nCreating a dataframe of sub weapon images...")
    # first call a function that returns all possible sub weapons
    # as a dataframe
    sub_weapons_df = find_sub_names(df_weapons)
    # find all image urls for sub images
    image_urls = find_all_sub_images()
    # add these to the original subs dataframe
    sub_weapons_df = create_sub_df(sub_weapons_df, image_urls)
    print("Done! " + str(sub_weapons_df.shape[0]) + "/14 images found")
    # check the length
    if sub_weapons_df.shape[0] == 14:
        # return the completed abilities dataframe
        return sub_weapons_df
    else:
        raise Exception(
            "Error: Special weapons data frame only has " +
            str(sub_weapons_df.shape[0]) +
            " Subs, some are missing"
        )


# function to find all possible sub weapons
def find_sub_names(df_weapons):
    # obtain a series of distinct sub weapons
    sub_weapons = df_weapons['Sub'].unique()
    # create a dataframe of distinct sub weapons
    sub_weapons_df = pd.DataFrame(sub_weapons, columns=['Sub Weapon'])
    # return this dataframe of sub weapons
    return sub_weapons_df


# function to find all sub weapon images
def find_all_sub_images():
    # make request to the site
    soup = BeautifulSoup(
        make_request(SUBS_GALLERY_PATH).text, "html.parser"
        )
    # obtain all images
    images = soup.find_all('img')
    image_urls = []  # to store urls
    # pause
    time.sleep(1)
    # loop through images on the page
    for img in images:
        # pause
        time.sleep(0.05)
        # call a function to add the image url to the list
        image_urls = find_sub_image(img, image_urls)
    # return the complete list
    return image_urls


# function to check if the image is a sub
# if so add to the list or urls
def find_sub_image(img, image_urls):
    src = img.get('src')
    # Check if the 'src' attribute exists and contains 'Sub'
    # we also want the 'Flat' images
    # also exclude any 'Small fry' images
    if src and 'Sub' in src and 'Flat' in src:
        if 'Smallfry' not in src:
            image_urls.append('https:' + src)
    return image_urls


# function to use both the list of images
# and dataframe of weapon names
# to create a complete dataframe of sub images
def create_sub_df(sub_weapons_df, image_urls):
    # first order the subs alphabetically
    sub_weapons_df = sub_weapons_df.sort_values(by='Sub Weapon')
    # reset the index again
    sub_weapons_df.reset_index(drop=True, inplace=True)
    # --- Assistance from ChatGPT ---------------------------------------
    # order the specials
    image_urls = sorted(image_urls, key=lambda img: img.split('/')[-1])
    # -------------------------------------------------------------------
    # add a new column called Image URL
    # add the urls here
    sub_weapons_df['Image URL'] = image_urls
    # return the complete dataframe
    return sub_weapons_df




In [9]:
extract_subs_images(df_weapons)


Creating a dataframe of sub weapon images...
Done! 14/14 images found


Unnamed: 0,Sub Weapon,Image URL
0,Angle Shooter,https://cdn.wikimg.net/en/splatoonwiki/images/...
1,Autobomb,https://cdn.wikimg.net/en/splatoonwiki/images/...
2,Burst Bomb,https://cdn.wikimg.net/en/splatoonwiki/images/...
3,Curling Bomb,https://cdn.wikimg.net/en/splatoonwiki/images/...
4,Fizzy Bomb,https://cdn.wikimg.net/en/splatoonwiki/images/...
5,Ink Mine,https://cdn.wikimg.net/en/splatoonwiki/images/...
6,Point Sensor,https://cdn.wikimg.net/en/splatoonwiki/images/...
7,Splash Wall,https://cdn.wikimg.net/en/splatoonwiki/images/...
8,Splat Bomb,https://cdn.wikimg.net/en/splatoonwiki/images/...
9,Sprinkler,https://cdn.wikimg.net/en/splatoonwiki/images/...
