In [1]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import pandas as pd

## Version 1
Version 1 går igenom tävlingar

In [3]:
def start_browser(URL):
    options = Options()
    options.add_argument("start-maximized")
    options.add_argument("--disable-gpu")
    options.add_argument("disable-infobars")
    options.add_argument("--disable-extensions")
    options.add_argument("--disable-browser-side-navigation")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument('--headless')
    options.add_argument("enable-features=NetworkServiceInProcess")
    options.add_argument("--no-sandbox");
    browser = webdriver.Chrome(options=options)
    browser.get(URL)
    return browser


def get_comp_list(browser):
    html = browser.page_source
    soup = BeautifulSoup(html, features="html.parser")
    table = soup.find('table', attrs={'id':"ContentPlaceHolder2_usersGridView"})
    table_body = table.find('tbody')
    rows = table_body.find_all('tr', {'style':'color:#333333;background-color:#E7E7E7;'})
    comp_dict ={'href': [],'comp':[], 'date':[],'type':[]}
    for row in rows:
        cols = row.find('td')
        data = row.find_all('td')
        data = [ele.text.strip() for ele in data]
        comp_dict['comp'].append(data[0])
        comp_dict['date'].append(data[1])
        comp_dict['type'].append(data[2])
        c = cols.find_all('a', href=True)
        comp_dict['href'].append(c[0]['href'])
    return comp_dict

    
def go_to_next_page(browser, page_count):
    browser.find_element_by_xpath(f'//a[contains(@href,"Page${page_count}")]').click()
    
def iterate_through_pages(URL):
    browser = start_browser(URL)
    page_count = 1
    dfs = []
    while page_count < 41:
        print(f'Page: {page_count}')
        comp_dict = get_comp_list(browser)
        comp_df = pd.DataFrame(comp_dict, columns = ['href','comp','date','type'])
        dfs.append(comp_df)
        go_to_next_page(browser, page_count)
        page_count += 1
    return dfs
    


    
def go_to_comp(href, datum):
    BASE_URL = "https://online.styrkelyft.se/web/"
    comp_browser = start_browser(f"{BASE_URL}{href}")
    return get_comp_data(comp_browser,datum)
    


def categorize_data(strip):
    if strip[0].startswith('Herr') or  strip[0].startswith('Dam'):
        return 'kategori'
    elif strip[0].startswith('Viktklass'):
        return 'viktklass'
    elif strip[0].startswith('Plac'):
        return 'header'
    else:
        return 'resultat'

def get_comp_data(browser, datum):
    try: 
        html = browser.page_source
        soup = BeautifulSoup(html, features="html.parser")
        comp = soup.find('span', attrs={'id':"ContentPlaceHolder2_Header"})
        print(datum, comp.text.strip())
        table = soup.find('table', attrs={'id':"ContentPlaceHolder2_contestTable"})
        table_body = table.find('tbody')
        rows = table_body.find_all('tr')
        score_dict = {'datum': [], 'tävling': [], 'kategori': [], 'viktklass':[], 'data':[]}
        category = ""
        weight_class = ""
        for row in rows:
            cols = row.find_all('td')
            strip =  [ele.text.strip() for ele in cols]
            cat = categorize_data(strip)
            if cat != 'header':
                if cat == "kategori":
                    category = strip
                elif cat == "viktklass":
                    weight_class = strip
                else:
                    score_dict['datum'] += [datum]
                    score_dict['kategori'] += category
                    score_dict['viktklass'] += weight_class
                    score_dict['tävling'] += comp
                    score_dict['data'].append(strip)
        df = pd.DataFrame(score_dict)
        return df
    
    except AttributeError:
        print('No data')
        df = pd.DataFrame()
    return df



In [16]:
    
URL = "https://online.styrkelyft.se/web/oldContest.aspx"
compdfs = iterate_through_pages(URL)
compdf = pd.concat(compdfs)
    
dfs = []
for href, date in zip(compdf['href'],compdf['date']):
    df = go_to_comp(href, date)
    dfs.append(df)


tot = pd.concat(dfs)
tot

Page: 1
Page: 2
Page: 3
Page: 4
Page: 5
Page: 6
Page: 7
Page: 8
Page: 9


KeyboardInterrupt: 

# Version 2
Version 2 går igenom individer

In [32]:
def get_competitor_list(browser):
    html = browser.page_source
    soup = BeautifulSoup(html, features="html.parser")
    table = soup.find('table')
    table_body = table.find('tbody')
    rows = table_body.find_all('tr', {'style':'color:#333333;background-color:#E7E7E7;'})
    comp_dict ={'href': [],'namn':[], 'förening':[],'distrikt':[]}
    for row in rows:
        cols = row.find('td')
        data = row.find_all('td')
        data = [ele.text.strip() for ele in data]
        comp_dict['namn'].append(data[0])
        comp_dict['förening'].append(data[1])
        comp_dict['distrikt'].append(data[2])
        c = cols.find_all('a', href=True)
        comp_dict['href'].append(c[0]['href'])
    return comp_dict

def iterate_through_pages(URL):
    browser = start_browser(URL)
    browser.find_element_by_xpath(f'//*[@id="ContentPlaceHolder2_searchButton"]').click()
    page_count = 1
    dfs = []
    while page_count < 551:
        print(f'Page: {page_count}')
        comp_dict = get_competitor_list(browser)
        competitor_df = pd.DataFrame(comp_dict, columns = ['href','namn','förening','distrikt'])
        dfs.append(competitor_df)
        go_to_next_page(browser, page_count)
        page_count += 1
    return dfs


URL = "https://online.styrkelyft.se/web/showUser.aspx"

dfs = iterate_through_pages(URL)
competitor_df = pd.concat(dfs)
competitor_df.to_csv('tavlande.csv')

Page: 1
Page: 2
Page: 3
Page: 4
Page: 5
Page: 6
Page: 7
Page: 8
Page: 9
Page: 10
Page: 11
Page: 12
Page: 13
Page: 14
Page: 15
Page: 16
Page: 17
Page: 18
Page: 19
Page: 20
Page: 21
Page: 22
Page: 23
Page: 24
Page: 25
Page: 26
Page: 27
Page: 28
Page: 29
Page: 30
Page: 31
Page: 32
Page: 33
Page: 34
Page: 35
Page: 36
Page: 37
Page: 38
Page: 39
Page: 40
Page: 41
Page: 42
Page: 43
Page: 44
Page: 45
Page: 46
Page: 47
Page: 48
Page: 49
Page: 50
Page: 51
Page: 52
Page: 53
Page: 54
Page: 55
Page: 56
Page: 57
Page: 58
Page: 59
Page: 60
Page: 61
Page: 62
Page: 63
Page: 64
Page: 65
Page: 66
Page: 67
Page: 68
Page: 69
Page: 70
Page: 71
Page: 72
Page: 73
Page: 74
Page: 75
Page: 76
Page: 77
Page: 78
Page: 79
Page: 80
Page: 81
Page: 82
Page: 83
Page: 84
Page: 85
Page: 86
Page: 87
Page: 88
Page: 89
Page: 90
Page: 91
Page: 92
Page: 93
Page: 94
Page: 95
Page: 96
Page: 97
Page: 98
Page: 99
Page: 100
Page: 101
Page: 102
Page: 103
Page: 104
Page: 105
Page: 106
Page: 107
Page: 108
Page: 109
Page: 110
Page: 11

In [5]:

def go_to_competitor(href):
    BASE_URL = "https://online.styrkelyft.se/web/"
    comp_browser = start_browser(f"{BASE_URL}{href}")
    return get_competitor_data(comp_browser)

def get_competitor_data(browser):
    try: 
        html = browser.page_source
        soup = BeautifulSoup(html, features="html.parser")
        table = soup.find('table', attrs={'id':"ContentPlaceHolder2_ContestTable"})
        table_body = table.find('tbody')
        rows = table_body.find_all('tr')
        score_dict = {'tävling': [], 'datum': [], 'typ': [], 'vikt':[], 'böj':[], 'bänk':[], 'mark':[], 'total':[], 'poäng':[]}
        for row in rows[1:]:
            cols = row.find_all('td')
            strip =  [ele.text.strip() for ele in cols]
            score_dict['tävling'].append(strip[0])
            score_dict['typ'].append(strip[1])
            score_dict['datum'].append(strip[2])
            score_dict['vikt'].append(strip[3])
            score_dict['böj'].append(strip[4])
            score_dict['bänk'].append(strip[5])
            score_dict['mark'].append(strip[6])
            score_dict['total'].append(strip[7])
            score_dict['poäng'].append(strip[8])
        df = pd.DataFrame(score_dict)
        df.drop_duplicates(subset='datum', ignore_index=True, inplace=True)
        df.drop_duplicates(subset=['typ','vikt','bänk','total','poäng'], ignore_index=True, inplace=True)
        return df
    
    except AttributeError:
        print('No data')
        df = pd.DataFrame()
    return df


competitor_df = pd.read_csv('tavlande.csv')
df = pd.read_csv('complete_v2_partial.csv')
i=1409
for href, förening, distrikt, namn in zip(competitor_df['href'][1409:],competitor_df['förening'][1409:], competitor_df['distrikt'][1409:], competitor_df['namn'][1409:]):
    i = i+1
    df_temp = go_to_competitor(href)
    df_temp['namn'] = namn
    df_temp['förening'] = förening
    df_temp['distrikt'] = distrikt
    print(df_temp)
    df = df.append(df_temp, ignore_index=True)
    print(f"{namn} nr: {i} {100*(i/5500)}%")
df.to_csv('complete_v2.csv')


                                    tävling       datum  typ   vikt     böj  \
0                          Elitserien KL SL  2014-11-09  KSL  78,10  175,00   
1  DM i bänkpress (klassisk) Sydsvenska SDF  2014-09-21  KBP  79,40    0,00   

     bänk    mark   total   poäng              namn  förening        distrikt  
0  130,00  212,50  517,50  358,80  Andreas Stagnebo  Lunds TK  Sydsvenska SDF  
1  117,50    0,00  117,50   80,60  Andreas Stagnebo  Lunds TK  Sydsvenska SDF  
Andreas Stagnebo nr: 1410 25.636363636363633%
                    tävling       datum  typ   vikt     böj   bänk    mark  \
0  Enbart ranking KL SL dam  2016-11-21  KSL  62,70  110,00  67,50  135,00   
1           Div 1 dam KL SL  2015-11-09  KSL  62,55  112,50  65,00  140,00   
2     Götalandsmästerskapen  2015-03-20  KSL  62,30  102,50  62,50  140,00   
3           Div 1 dam KL SL  2015-02-11  KSL  61,55   95,00  60,00  130,00   
4     Enbart ranking SL dam  2015-02-08   SL  61,55   95,00  60,00  130,00   

    tot

In [6]:
df.to_csv('complete_v2_partial.csv')
df

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,tävling,datum,typ,vikt,böj,bänk,mark,total,poäng,namn,förening,distrikt
0,0.0,0.0,0.0,0.0,Elitserien dam KL BP,2018-11-19,KBP,6270,0,10000,0,10000,10779,Karolina Arvidson,Örebro KK,Västra Svealands SDF
1,1.0,1.0,1.0,1.0,"EM Bänkpress, utrustning",2018-10-12,BP,5689,,12250,,,14236,Karolina Arvidson,Örebro KK,Västra Svealands SDF
2,2.0,2.0,2.0,2.0,EM klassisk bänkpress 2018,2018-08-11,KBP,5691,0,10750,0,10750,12490,Karolina Arvidson,Örebro KK,Västra Svealands SDF
3,3.0,3.0,3.0,3.0,SM i Klassisk Bänkpress 2018,2018-07-08,KBP,5685,000,10000,000,10000,11628,Karolina Arvidson,Örebro KK,Västra Svealands SDF
4,4.0,4.0,4.0,4.0,Enbart Ranking KL BP dam,2018-05-19,KBP,5614,000,10500,000,10500,12330,Karolina Arvidson,Örebro KK,Västra Svealands SDF
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22493,,,,,Div 2 Dam SL klassisk,2022-05-14,KSL,5695,9750,5250,10000,25000,29030,Linda Bengtsson,Göteborgs KK,Västra Götaland SDF
22494,,,,,Allsvenskan Dam SL klassisk,2022-05-14,KSL,6610,10000,6250,13000,29250,30310,My Haag,Linköpings AK,Sydöstra SDF
22495,,,,,Allsvenskan Dam BP klassisk,2022-05-14,KBP,10620,0,8000,0,8000,6556,Justyna Kaletka,Täby AK,Östra Svealands SDF
22496,,,,,Div 1 Herr BP klassisk,2022-05-18,KBP,8405,0,11000,0,11000,7289,Richard Landt,Håbo AK,Östra Svealands SDF
