### Author: Paula Abigail Tam
### Project: GBF character dataset

This project is to scrape the character data from the tierlist on GBFwiki. End goal is to make a visualizer to make character statistics more digestible.

Example: To be able to compare the number of Dark SSR characters vs number of Light SSR characters.

In [1]:
#imports
import requests
import pandas as pd
from bs4 import BeautifulSoup
from IPython.display import display

In [2]:
#character data url
URL = "https://gbf.wiki/Character_Tier_List"
page = requests.get(URL)

In [3]:
soup = BeautifulSoup(page.content, "html.parser")

In [4]:
table_tier = soup.find('table', class_ = "wikitable tierlist tierlist-multi")
td = table_tier.find_all('td')

In [5]:
#function to determine rarity of character
def chara_rarity(id_num):
    match id_num:
        case "4":
            return "SSR"
        case "3":
            return "SR"
        case "2":
            return "R"

In [6]:
#function to determine a character's element
def which_element(element_n):
    match element_n:
        case 1:
            return "Fire"
        case 2:
            return "Water"
        case 3:
            return "Earth"
        case 4:
            return "Wind"
        case 5:
            return "Light"
        case 6:
            return "Dark"
        case 7:
            return "Any"
        case 0:
            return "Rank"

In [7]:
#function to check if multiple series / weapons
def is_multiple(string):
    line = string.split(",")
    return line

In [8]:
#function to rename series
def which_series(series_name):
    match series_name:
        case "":
            return "-"
        case "none":
            return "Permanent"
        case "tie-in":
            return "Tie-In / Collab"
        case "12generals":
            return "Zodiac"
        case default:
            return series_name.capitalize()

In [9]:
def get_chara_page(short_id):
    table_detail = soup.find('table', class_ = "wikitable tierlist-details")
    tr = table_detail.find_all('tr', attrs={'data-short-id':short_id})
    for k in tr:
        a = k.find('a')
        return(a['href'])

In [10]:
#initliaize list of dicts
list_of_charas = []
n = 0

for i in td:
    item = i.find_all('span', attrs={"data-short-id": True})
    
    for j in item:            
        name = [title['title'] for title in j.find_all('a')]
        series = is_multiple(j['data-filter-series'])
        wep = is_multiple(j['data-filter-weapon'])
        race = is_multiple(j['data-filter-race'])
        
        chara_page = get_chara_page(j['data-short-id'])
        chara_url = "https://gbf.wiki" + chara_page
        
        chara_info = {}
        #populate the character's dictionary
        chara_info['Rarity'] = chara_rarity(j['data-short-id'][0])
        chara_info['Element'] = which_element(n) #call function to convert n to element
        chara_info['Name'] = name[0] #to just keep it as a string
        
        chara_info['Series'] = which_series(series[0]) #seasonal/grand/etc.
        if len(series) > 1:
            chara_info['2nd Series'] = which_series(series[1])
        else:
            chara_info['2nd Series'] = "-"
            
        chara_info['Weapon'] = wep[0].capitalize()
        if len(wep) > 1:
            chara_info['2nd Weapon'] = wep[1].capitalize()
        else:
            chara_info['2nd Weapon'] = "-"
            
        chara_info['Race'] = race[0].capitalize()
        if len(race) > 1:
            chara_info['2nd Race'] = race[1].capitalize()
        else:
            chara_info['2nd Race'] = "-"
            
        chara_info['Type'] = j['data-filter-style'].capitalize()
        chara_info['URL'] = chara_url
        
        list_of_charas.append(chara_info)
        
    if n != 7:
        n += 1
    else:
        n = 0

In [11]:
df = pd.DataFrame(list_of_charas)

In [12]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df)

Unnamed: 0,Rarity,Element,Name,Series,2nd Series,Weapon,2nd Weapon,Race,2nd Race,Type,URL
0,SSR,Fire,Michael,Grand,-,Sabre,-,Primal,-,Attack,https://gbf.wiki/Michael
1,SSR,Fire,Percival,Grand,-,Sabre,-,Human,-,Attack,https://gbf.wiki/Percival_(Grand)
2,SSR,Water,Poseidon,Grand,-,Spear,-,Primal,-,Attack,https://gbf.wiki/Poseidon
3,SSR,Water,Vajra,Zodiac,-,Katana,Melee,Erune,-,Attack,https://gbf.wiki/Vajra
4,SSR,Wind,Korwa,Summer,-,Staff,-,Erune,-,Special,https://gbf.wiki/Korwa_(Summer)
5,SSR,Wind,Lich,Halloween,-,Staff,-,Primal,-,Special,https://gbf.wiki/Lich_(Halloween)
6,SSR,Wind,Narmaya,Grand,-,Katana,-,Draph,-,Attack,https://gbf.wiki/Narmaya_(Grand)
7,SSR,Wind,Vania,Yukata,-,Staff,Dagger,Other,-,Attack,https://gbf.wiki/Vania_(Yukata)
8,SSR,Light,Florence,Halloween,-,Staff,-,Human,-,Special,https://gbf.wiki/Florence_(Halloween)
9,SSR,Light,Nehan,Grand,-,Gun,Melee,Erune,-,Special,https://gbf.wiki/Nehan


In [13]:
df.to_csv('GBF_character_dataset.csv', index=False)