### Import and settings

In [30]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

pd.set_option('display.max_rows', 25)
pd.set_option('display.max_columns', 50)

### Functions

In [31]:
def get_players(year: int, out=False, count=10, **kwargs):
    page_no = 0

    players = pd.DataFrame(columns = ['name', 'pos', 'card_id', 'year', 'link'])
    
    additional = ''
    
    for key in kwargs:
        additional = additional + '&' + key + '=' + str(kwargs.get(key))

    valid = True

    while valid:
        page_no += 1
        url = f'https://www.futbin.com/{year}/players?page={page_no}' + additional
        print(url)
        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
        result = requests.get(url, headers=headers)
        soup = BeautifulSoup(result.content, 'html.parser')
        ply = soup.find_all("a", {"class": "player_name_players_table"})
        pos = soup.find_all("div", {"class": "font-weight-bold"})
        
        if ply:
            for j in range(len(ply)):
                players = pd.concat([pd.DataFrame([[ply[j].text, pos[j].text, ply[j].get('data-site-id'), ply[j].get('data-year'), ply[j].get('href')]], columns=players.columns), players], ignore_index=True)
        else:
            valid = False
            
        if out and page_no % count == 0:
            print(page_no)
            
        time.sleep(2)

    return players

def get_stats(players: pd.DataFrame, out=False, count=100):
    links = players['link']
    position = players['pos']

    stats = pd.DataFrame(columns = ['player_id', 'dob', 'club', 'club_id', 'league', 'league_id', 'nation', 'card_id', 'rating', 'pac', 'acceleration', 'sprint_speed', 'sho', 'positioning', 'finishing', 'shot_power', 'long_shots', 
                                    'volleys', 'penalties', 'pas', 'vision', 'crossing', 'fk_accuracy', 'short_passing', 'long_passing', 'curve', 'dri', 'agility', 'balance',
                                    'reactions', 'ball_control', 'dribbling', 'composure', 'def', 'interceptions', 'heading_acc', 'def_awareness', 
                                    'standing_tackle', 'sliding_tackle', 'phy', 'jumping', 'stamina', 'strength', 'aggression', 'gk_diving', 'gk_handling', 'gk_kicking',
                                    'gk_reflexes', 'gk_speed', 'gk_positioning'])

    for i in range(len(links)):
        url = f'https://www.futbin.com{links[i]}'
        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
        result = requests.get(url, headers=headers)
        soup = BeautifulSoup(result.content, 'html.parser')
        rating = soup.find("div", {"class": "pcdisplay-rat"})
        ids = soup.find("div", {"id": "page-info"})
        stat_tab = soup.find("div", {"class": "card-body"})
        ss = stat_tab.find_all("div", {"class": "stat_val"})
        dob = soup.find("a", {"style": "color : #346fda;"}).get("title").split()[2]
        
        spec_list = soup.find("div", {"id": "info_content"})
        spec_tab = spec_list.find_all("td")
        club = ""
        league = ""
        nation = ""
        
        spec_len = len(spec_tab)
        club_id = spec_tab[spec_len-2].text
        league_id = spec_tab[spec_len-1].text
        year = int(ids.get("data-year"))
        int_rep = False
        if spec_list.find_all("th")[spec_len-14].text == "Intl. Rep ":
            int_rep = True
        if year == 23:
            if int_rep:
                club = spec_tab[spec_len-19].find("a").text
                try:
                    league = spec_tab[spec_len-17].find("a").text
                except:
                    print(url)
                nation = spec_tab[spec_len-18].find("a").text
            else:
                club = spec_tab[spec_len-18].find("a").text
                league = spec_tab[spec_len-16].find("a").text
                nation = spec_tab[spec_len-17].find("a").text
        elif year < 23 and year >= 15:
                club = spec_tab[1].find("a").text
                league = spec_tab[3].find("a").text
                nation = spec_tab[2].find("a").text
        else:
            print("L")
            
            
        if ss and position[i] != 'GK':
            stats = pd.concat([pd.DataFrame([[ids.get("data-baseid"), dob, club, club_id, league, league_id, nation, ids.get('data-id'), rating.text, ss[1].text, ss[3].text, ss[5].text, ss[7].text, ss[9].text,
                                            ss[11].text, ss[13].text, ss[15].text, ss[17].text, ss[19].text, ss[21].text, ss[23].text, ss[25].text,
                                            ss[27].text, ss[29].text, ss[31].text, ss[33].text, ss[35].text, ss[37].text, ss[39].text, ss[41].text, ss[43].text,
                                            ss[45].text, ss[47].text, ss[49].text, ss[51].text, ss[53].text, ss[55].text, ss[57].text, ss[59].text,
                                            ss[61].text, ss[63].text, ss[65].text, ss[67].text, ss[69].text, None, None, None, None, None, None]], columns=stats.columns), stats], ignore_index=True)            
        else:
            stats = pd.concat([pd.DataFrame([[ids.get("data-baseid"), dob, club, club_id, league, league_id, nation, ids.get('data-id'), rating.text, None, None, None, 
                                                None, None, None, None, None, None, None, None, None, None,
                                                None, None, None, None, None, None, None, None, None, None, None, None, None, None,
                                                None, None, None, None, None, None, None, None, ss[1].text, ss[5].text, ss[9].text, 
                                                ss[13].text, ss[17].text, ss[21].text]], columns=stats.columns), stats], ignore_index=True)

    if out and i % count == 0:
            print(i)
    
    return stats

In [40]:
leagues = [13, 53, 31, 16, 19]

# Prem: 13
# La Liga: 53
# Serie A: 31
# ligue 1: 16
# Bundesliga: 19

dfs = []

for i in leagues:
    dfs.append(get_players(year=20, out=True, version='gold', league=i))

https://www.futbin.com/20/players?page=1&version=gold&league=13
https://www.futbin.com/20/players?page=2&version=gold&league=13
https://www.futbin.com/20/players?page=3&version=gold&league=13
https://www.futbin.com/20/players?page=4&version=gold&league=13
https://www.futbin.com/20/players?page=5&version=gold&league=13
https://www.futbin.com/20/players?page=6&version=gold&league=13
https://www.futbin.com/20/players?page=7&version=gold&league=13
https://www.futbin.com/20/players?page=8&version=gold&league=13
https://www.futbin.com/20/players?page=9&version=gold&league=13
https://www.futbin.com/20/players?page=10&version=gold&league=13
10
https://www.futbin.com/20/players?page=11&version=gold&league=13
https://www.futbin.com/20/players?page=12&version=gold&league=13
https://www.futbin.com/20/players?page=13&version=gold&league=13
https://www.futbin.com/20/players?page=14&version=gold&league=13
https://www.futbin.com/20/players?page=15&version=gold&league=13
https://www.futbin.com/20/playe

In [41]:
dfs[0]

Unnamed: 0,name,pos,card_id,year,link
0,Mohammed Salisu,CB,50353,20,/20/player/50353/mohammed-salisu
1,Lovre Kalinic,GK,50346,20,/20/player/50346/lovre-kalinic
2,Yannick Bolasie,LW,50285,20,/20/player/50285/yannick-bolasie
3,Ferrán Torres,RM,50237,20,/20/player/50237/ferran-torres
4,Juan Camilo Hernández,ST,49846,20,/20/player/49846/juan-camilo-hernandez
...,...,...,...,...,...
410,Sergio Agüero,ST,294,20,/20/player/294/sergio-aguero
411,De Gea,GK,239,20,/20/player/239/de-gea
412,Virgil van Dijk,CB,469,20,/20/player/469/virgil-van-dijk
413,Mohamed Salah,RW,461,20,/20/player/461/mohamed-salah


In [42]:
stat_dfs = []

for i in dfs:
    stat_dfs.append(get_stats(players=i, out=True))

In [43]:
comb = []

for i in range(len(dfs)):
    comb.append(pd.merge(dfs[i], stat_dfs[i], on='card_id'))

In [44]:
total = pd.concat(comb)

In [45]:
total

Unnamed: 0,name,pos,card_id,year,link,player_id,dob,club,club_id,league,league_id,nation,rating,pac,acceleration,sprint_speed,sho,positioning,finishing,shot_power,long_shots,volleys,penalties,pas,vision,...,curve,dri,agility,balance,reactions,ball_control,dribbling,composure,def,interceptions,heading_acc,def_awareness,standing_tackle,sliding_tackle,phy,jumping,stamina,strength,aggression,gk_diving,gk_handling,gk_kicking,gk_reflexes,gk_speed,gk_positioning
0,Mohammed Salisu,CB,50353,20,/20/player/50353/mohammed-salisu,244915,17-04-1999,Southampton,17,Premier League,13,Ghana,75,70,67,72,36,25,30,61,28,22,41,52,26,...,55,56,59,52,70,62,50,57,77,75,73,77,78,74,82,80,71,89,75,,,,,,
1,Lovre Kalinic,GK,50346,20,/20/player/50346/lovre-kalinic,220932,03-04-1990,Aston Villa,2,Premier League,13,Croatia,75,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,80,74,43,78,34,-
2,Yannick Bolasie,LW,50285,20,/20/player/50285/yannick-bolasie,189157,24-05-1989,Everton,7,Premier League,13,Congo DR,75,81,78,83,69,71,66,80,68,56,65,69,68,...,65,79,74,68,73,81,81,75,34,31,70,29,34,17,75,85,66,78,76,,,,,,
3,Ferrán Torres,RM,50237,20,/20/player/50237/ferran-torres,241461,29-02-2000,Manchester City,10,Premier League,13,Spain,75,78,79,78,69,72,74,71,58,67,62,69,67,...,58,76,74,67,66,76,79,73,32,34,50,35,24,28,57,59,67,61,34,,,,,,
4,Juan Camilo Hernández,ST,49846,20,/20/player/49846/juan-camilo-hernandez,237034,22-04-1999,Watford,1795,Premier League,13,Colombia,75,86,88,84,75,75,78,73,74,64,70,62,61,...,57,71,71,79,74,68,73,67,38,23,69,44,33,37,71,72,74,72,65,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
308,Thiago,CM,428,20,/20/player/428/thiago,189509,11-04-1991,FC Bayern München,21,Bundesliga,19,Spain,87,69,76,64,74,79,69,76,79,86,75,85,86,...,85,90,91,88,86,91,90,87,69,83,54,67,67,71,63,76,75,59,56,,,,,,
309,Mats Hummels,CB,421,20,/20/player/421/mats-hummels,178603,16-12-1988,Borussia Dortmund,22,Bundesliga,19,Germany,87,51,46,55,58,56,55,71,51,60,68,76,79,...,65,73,64,59,87,81,68,89,89,89,87,90,89,86,76,68,65,85,71,,,,,,
310,Manuel Neuer,GK,420,20,/20/player/420/manuel-neuer,167495,27-03-1986,FC Bayern München,21,Bundesliga,19,Germany,88,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,87,87,91,87,56,60
311,Marco Reus,CAM,189,20,/20/player/189/marco-reus,188350,31-05-1989,Borussia Dortmund,22,Bundesliga,19,Germany,88,85,85,85,88,89,87,88,87,90,90,84,86,...,89,87,86,84,87,88,87,85,45,48,49,49,37,45,66,71,76,67,51,,,,,,


In [46]:
len(total.player_id.unique())

1433

In [47]:
total.to_csv("csv files/futbin_2020.csv", index=False)