### Author: Paula Abigail Tam
### Project: GBF character dataset

This project is to scrape the character data from the tierlist on GBFwiki. End goal is to make a visualizer to make character statistics more digestible.

Example: To be able to compare the number of Dark SSR characters vs number of Light SSR characters.

In [13]:
#imports
import requests
import pandas as pd
import re
from bs4 import BeautifulSoup
from IPython.display import display

In [14]:
#character data url
URL = "https://gbf.wiki/Character_Tier_List"
page = requests.get(URL)

In [15]:
soup = BeautifulSoup(page.content, "html.parser")

In [16]:
table_tier = soup.find('table', class_ = "wikitable tierlist tierlist-multi")
td = table_tier.find_all('td')

In [17]:
#function to determine rarity of character
def chara_rarity(id_num):
    match id_num:
        case "4":
            return "SSR"
        case "3":
            return "SR"
        case "2":
            return "R"

In [18]:
#function to determine a character's element
def which_element(element_n):
    match element_n:
        case 1:
            return "Fire"
        case 2:
            return "Water"
        case 3:
            return "Earth"
        case 4:
            return "Wind"
        case 5:
            return "Light"
        case 6:
            return "Dark"
        case 7:
            return "Any"
        case 0:
            return "Rank"

In [19]:
#function to check if multiple series / weapons
def is_multiple(string):
    line = string.split(",")
    return line

In [20]:
#function to rename series
def which_series(series_name):
    match series_name:
        case "":
            return "-"
        case "none":
            return "Permanent"
        case "tie-in":
            return "Tie-In / Collab"
        case "12generals":
            return "Zodiac"
        case default:
            return series_name.capitalize()

In [21]:
def access_details(short_id):
    table_detail = soup.find('table', class_ = "wikitable tierlist-details")
    tr = table_detail.find_all('tr', attrs={'data-short-id':short_id})
    return tr

In [22]:
def get_chara_page(short_id):
    #table_detail = soup.find('table', class_ = "wikitable tierlist-details")
    #tr = table_detail.find_all('tr', attrs={'data-short-id':short_id})
    tr = access_details(short_id)
    for k in tr:
        a = k.find('a')
        return(a['href'])

In [23]:
def isfloat(value): 
    try: 
        return True, float(value) 
    except ValueError: 
        return False, value

def get_rating(short_id):
    tr = access_details(short_id)
    for k in tr:
        td = k.find_all('td')
        for i in td:
            number, rating = isfloat(i.text)
            if number:
                return rating
            else:
                continue

In [24]:
#initliaize list of dicts
list_of_charas = []
n = 0

for i in td:
    item = i.find_all('span', attrs={"data-short-id": True})
    
    for j in item:            
        name = [title['title'] for title in j.find_all('a')]
        series = is_multiple(j['data-filter-series'])
        wep = is_multiple(j['data-filter-weapon'])
        race = is_multiple(j['data-filter-race'])
        
        chara_page = get_chara_page(j['data-short-id'])
        chara_url = "https://gbf.wiki" + chara_page
        
        chara_info = {}
        #populate the character's dictionary
        chara_info['ID'] = j['data-short-id']
        chara_info['Rating'] = get_rating(j['data-short-id'])
        chara_info['Rarity'] = chara_rarity(j['data-short-id'][0])
        chara_info['Element'] = which_element(n) #call function to convert n to element
        chara_info['Name'] = name[0] #to just keep it as a string
        
        chara_info['Series'] = which_series(series[0]) #seasonal/grand/etc.
        if len(series) > 1:
            chara_info['2nd Series'] = which_series(series[1])
        else:
            chara_info['2nd Series'] = "-"
            
        chara_info['Weapon'] = wep[0].capitalize()
        if len(wep) > 1:
            chara_info['2nd Weapon'] = wep[1].capitalize()
        else:
            chara_info['2nd Weapon'] = "-"
            
        chara_info['Race'] = race[0].capitalize()
        if len(race) > 1:
            chara_info['2nd Race'] = race[1].capitalize()
        else:
            chara_info['2nd Race'] = "-"
            
        chara_info['Type'] = j['data-filter-style'].capitalize()
        chara_info['URL'] = chara_url
        
        list_of_charas.append(chara_info)
        
        if n != 7:
            n += 1
        else:
            n = 0

In [25]:
df = pd.DataFrame(list_of_charas)

In [26]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df)

Unnamed: 0,ID,Rating,Rarity,Element,Name,Series,2nd Series,Weapon,2nd Weapon,Race,2nd Race,Type,URL
0,4440,10.0,SSR,Rank,Michael,Grand,-,Sabre,-,Primal,-,Attack,https://gbf.wiki/Michael
1,4425,10.0,SSR,Fire,Percival,Grand,-,Sabre,-,Human,-,Attack,https://gbf.wiki/Percival_(Grand)
2,4499,10.0,SSR,Water,Zeta,Grand,-,Spear,-,Human,-,Attack,https://gbf.wiki/Zeta_(Grand)
3,4492,10.0,SSR,Earth,Gabriel,Grand,-,Staff,-,Primal,-,Defense,https://gbf.wiki/Gabriel
4,4502,10.0,SSR,Wind,Payila,Zodiac,-,Staff,-,Draph,-,Attack,https://gbf.wiki/Payila
5,4348,10.0,SSR,Light,Poseidon,Grand,-,Spear,-,Primal,-,Attack,https://gbf.wiki/Poseidon
6,4501,10.0,SSR,Dark,Uriel,Grand,-,Melee,-,Primal,-,Attack,https://gbf.wiki/Uriel
7,4127,10.0,SSR,Any,Korwa,Summer,-,Staff,-,Erune,-,Special,https://gbf.wiki/Korwa_(Summer)
8,4427,10.0,SSR,Rank,Lich,Halloween,-,Staff,-,Primal,-,Special,https://gbf.wiki/Lich_(Halloween)
9,4335,10.0,SSR,Fire,Narmaya,Grand,-,Katana,-,Draph,-,Attack,https://gbf.wiki/Narmaya_(Grand)


In [27]:
df.to_csv('GBF_character_dataset.csv', index=False)