In [14]:
import requests
import re
import random

In [15]:
from bs4 import BeautifulSoup 

In [16]:
#where we are storing the data in an array currently 
player_nums = [] 
player_age = [] 
player_sex = [] 
player_debt = [] 
player_survival = [] 

In [17]:
#save the landing page of player 
base_url = "https://squid-game-minor-players.fandom.com/wiki/"

In [18]:
#function to get the URL of the player we are interested in 
def get_url(i): 
    url = base_url + "Player_"
    if(i < 10): 
        url = url + "00" + str(i)
    elif(i<100): 
        url = url + "0" + str(i)
    else:
        url = url + str(i)
    return url 

In [19]:
#year functions

#function to get the year text from soup
#it returns the text in the div 
def get_year_text(soup):
    #go to the div containing the year player was born 
    yearDiv = soup.find('div', {'class': 'pi-item pi-data pi-item-spacing pi-border-color', 'data-source': 'born'}) 
    if(yearDiv):
     year = yearDiv.find('div', class_ = "pi-data-value pi-font")
     return(year.get_text())
    return None

#function which uses regular expression to extract a year from text 
#returns 0 if the year is not found
def extract_year(yearText):
    match = re.search(r'\b\d{4}\b', yearText)
    if match: 
        return int(match.group())
    return 0 

#function to calculate age, returns string age, trace is age is 0 
def calc_age(year): 
    if year == 0:
        return "tr"
    return str(2024 - year)

#function to get the year the player was born 
def set_age(soup):
    yearText = get_year_text(soup)
    if(yearText):
        player_age.append(calc_age(extract_year(yearText)))
    else:
        player_age.append("tr")

In [20]:
#function to fill in csv with tr values if the url is not found
def fill_trace(i): 
    player_nums.append(i)
    player_age.append("tr")
    player_debt.append("tr")

In [21]:
#sex functions 

#function to update array of players sex 
def set_sex(soup): 
    sexText = get_sex_text(soup)
    if(sexText): 
        player_sex.append(extract_sex(sexText))
    else:
        player_sex.append("tr")

#function to get the text containing player sex
def get_sex_text(soup):
    #go to the div containing the player gender
    sexDiv = soup.find('div', {'class': 'pi-item pi-data pi-item-spacing pi-border-color', 'data-source': 'gender'}) 
    if(sexDiv):
        sex = sexDiv.find('div', class_ = "pi-data-value pi-font")
        return(sex.get_text())
    return None

#function to extract the sex of the characters
#Some have both genders, if thats the case return tr 
def extract_sex(sexText): 
    #if the sex is > 6 characters, randomly assign a sex
    if len(sexText) > 6: 
        return "tr"
        randomSex = random.randint(0,1)
        if(randomSex == 0): 
            return "Male"
        else:
            return "Female"
    return sexText; 

In [22]:
def set_num(i):
    player_nums.append(i)

In [23]:
#debt functions 

def set_debt(soup): 
    debtText = get_debt_text(soup)
    if(debtText and debtText != "tr"): 
        player_debt.append(extract_debt(debtText))
    else:
        player_debt.append("tr")
    
#words used in wiki to describe debt - heavy debt, debt
pattern = r"heavy debt|debt" 

def get_debt_text(soup): 
    #go to the div containing the player gender
    historySpan = soup.find('span', {'class': 'mw-headline', 'id': 'History'}) 
    if(historySpan):
        historyH2 = historySpan.find_parent()
        debtDiv = historyH2.find_next_sibling('p')
        return(debtDiv.get_text())
    return None

def extract_debt(debtText):
    debt = re.findall(pattern, debtText)
    if(debt):
        return debt[0]
    return "tr"

In [24]:
# survival function 

pattern = r"Game 1|Red Light,Green Light|Episode 1" 
def set_survival(soup): 
    #want to check a couple places
    fate = check_fate(soup)
    if(not fate):
       fate = check_appearances_link(soup)
    if(not fate): 
         fate = check_apperances(soup) 
    if(not fate): 
         player_survival.append("died")
    else: 
      player_survival.append(fate)
    
    

def check_fate(soup):  
    #go to the div containing how the player died 
    diedDiv = soup.find('div', {'class': 'pi-item pi-data pi-item-spacing pi-border-color', 'data-source': 'died'}) 
    if(diedDiv):
     died = diedDiv.find('div', class_ = "pi-data-value pi-font")
     diedText = died.get_text() 
     matches = re.findall(pattern, diedText)
     if len(matches) == 0: 
        return "survived"
     else: 
        return "died"
    return None

def check_appearances_link(soup): 
    #if there is a first and last, survived
    #else died in first game 
     #go to the div containing how the player died 
    link = soup.find('td', {'class' : 'pi-horizontal-group-item pi-data-value pi-font pi-border-color pi-item-spacing', 'data-source' : 'first'})
    if(link):
      actualLink = link.find('a')
      if(actualLink):
         actualLinkText = actualLink.get_text() 
         matches = re.findall(pattern, actualLinkText)
         if len(matches) == 0: 
            return "survived"
         else: 
            return "died"
    return None

def check_apperances(soup): 
   apperance = soup.find('td', {'class' : 'pi-horizontal-group-item pi-data-value pi-font pi-border-color pi-item-spacing', 'data-source' : 'first'})
   if(apperance):
     text = apperance.get_text() 
     matches = re.findall(pattern, text)
     if len(matches) == 0: 
        return "survived"
     else: 
        return "died"
   return None

In [25]:
#main loop 

for i in range(0,456): 
    url = get_url(i)
    response = requests.get(url)
    #check if url exists 
    if response.status_code != 200: 
        continue 
    #if it does, set player num 
    set_num(i)

    #get a response
    response = response.content

    #create a beautiful soup of the url we saved, parsing it into HTML 
    soup = BeautifulSoup(response, 'html.parser')

    #set player age
    set_age(soup)

    #set player sex
    set_sex(soup)

    #set player debt 
    set_debt(soup)

    set_survival(soup)



In [26]:
#csv file 
import csv 
#field names
fields = ['Player Number', 'Age', 'Sex', 'Debt', 'Survival']
rows = [] 
#create rows of csv file 
for i in range(0, len(player_nums)):
    row = [player_nums[i], player_age[i], player_sex[i], player_debt[i], player_survival[i]]
    rows.append(row)
#name of csv file 
filename = "player_records.csv"

with open(filename, 'w') as csvfile: 
    #create a csv writer object
    csvwriter = csv.writer(csvfile)
    #write fields and rows
    csvwriter.writerow(fields)
    csvwriter.writerows(rows)