In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as soup
from pprint import pprint
from time import sleep
import pandas as pd

class IFSCBloudererScraper():
    """
    Define a class for the scrapper from the IFSC website
    (ifsc-climbing.org)
    scrapes both male and female competitors
    """
    
    def __init__(self, debug=False):
        """
        Initialize a scraper object with the Chrome Browser instance
        Input:
            debug - Indicates whther this is a debuf instance
        """
        self.debug = debug
        
        # Create Chrome isntance
        self.browser = Browser('chrome', incognito=True)
        
        sleep(1)
    
    def personal_info(self, link):
        """
        Returns the information from an individual Athletes page on the 
        IFSC's website
        input:
            link - the url for that specific athlete
        return:
            INFO - a list of the information from the page
        """
        
        self.browser.visit(link)
        # grab the html for parsing
        per_info_soupy = soup(self.browser.html, "html.parser")
        
        # find the div with class athlete to gather the info from the header
        athlete = per_info_soupy.find_all("div", class_="athlete")
        country = athlete[0].find("div", class_="country").find("span").text
        name = athlete[0].find("h1", class_="name").text
        age = athlete[0].find("span", class_="age").text
        hometown = athlete[0].find("span", class_="hometown").text
        
        # Find the personal info div to grab info on the athletes arm span and etc.
        info = per_info_soupy.find("div", class_="personal-info")
        athlete_info = info.find("div",class_="text")
        catergories = []
        data = []
        Athlete_infos = []
        
        # Use a exception loop as not all the athletes have the same personal info and some have none
        try:   
            for i in athlete_info:
                if i["class"][0]=="subtitle":
                    catergories.append(i.text)
                else:
                    data.append(i.text)

            athlete_infos = {catergories[i]: data[i] for i in range(len(catergories))}
            Athlete_infos.append(athlete_infos)

        except:
            "NO PERSONAL INFORMATION"
        INFO =[country,name,age, Athlete_infos]
        return INFO
    
    def climbers_stats(self, link):
        """
        Enters the iframe allowing us to itterate throught the table
        input:
            link - the url of the Athletes rankings
        """
        
        self.browser.visit(link)
        soupy = soup(self.browser.html, "html.parser")
        iframe = soupy.find('iframe')
        iframe_url = iframe['data-src']
        iframe_url
        self.browser.visit(iframe_url)
        return
    
    def loop_table(self):
        """
        Loops through the table
        input:
            N/A
        return:
            climbers - a list of the links 
        """
        
        iframesoup = soup(self.browser.html, "html.parser")
        table = iframesoup.find('table')
        climbers =[]
        for row in table.find_all('tr'):
            try:
                link = row.find_all('td')[1].find("a")["href"]
                climber = self.personal_info(link)
                climbers.append(climber)

            except:
                continue
        return climbers

    def climbers(self, url):
        """
        Combines the previous steps into one function
        input:
            url - the url of the Athletes rankings
        returns:
            list of lists of the climbers and their attributes
        """
        
        self.climbers_stats(url)
        sleep(1)
        climbers = self.loop_table()
        return climbers
    
    def create_climber_df(self, url):
        """
        Creates a dataframe from teh climbers functions list
        input:
            url - the url of the Athletes rankings
        returns:
            df - a dataframe of the athletes
        """
    
        competitors = self.climbers(url)
        df = pd.DataFrame(competitors)
        df = self.clean_climber_df(df)
        return df
    
    def clean_climber_df(self, df):
        """
        Creates a dataframe from teh climbers functions list
        input:
            df - the previously created df
        returns:
            df - cleans the df
        """
        
        personal_info_list = pd.json_normalize(df[3])
        personl_info_df = pd.json_normalize(personal_info_list[0])
        df[1]=df[1].str.strip()
        name_df = df[1].str.split(" ",expand=True)
        for i in range(len(name_df.columns)):
            name_df.rename(columns={i:f"name_{i}"}, inplace=True)
        df[2]=df[2].str.strip("Age: ")
        df = df.merge(name_df, left_index=True, right_index=True)
        df = df.drop([1,3],axis=1)
        df = df.merge(personl_info_df,left_index=True, right_index=True)
        df = df.rename(columns={0:"country",2:"age"})
        return df
    
    def women(self):
        women_url = 'https://www.ifsc-climbing.org/index.php/world-competition/calendar?task=ranking-complete&category=7'
        df = self.create_climber_df(women_url)
        return df

    def men(self):
        men_url = 'https://www.ifsc-climbing.org/index.php/world-competition/calendar?task=ranking-complete&category=3'
        df = self.create_climber_df(men_url)
        return df
    
    def scrape(self):
        women_df = self.women()
        men_df = self.men()
        self.browser.quit()

        women_df.to_csv('women_competitors.csv', index=False)
        men_df.to_csv('men_competitors.csv', index=False)
        
def main():
    # Create scraper object
    scraper = IFSCBloudererScraper()
    
    #Run scraper
    scraper.scrape()
    
if __name__ == '__main__':
    main()