### Import and settings

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

pd.set_option('display.max_rows', 25)
pd.set_option('display.max_columns', 50)

### Functions

In [6]:
def get_players(year: int, out=False, count=10, **kwargs):
    page_no = 0

    players = pd.DataFrame(columns = ['name', 'pos', 'card_id', 'year', 'link'])
    
    additional = ''
    
    for key in kwargs:
        additional = additional + '&' + key + '=' + str(kwargs.get(key))

    valid = True

    while valid:
        page_no += 1
        url = f'https://www.futbin.com/{year}/players?page={page_no}' + additional
        print(url)
        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
        result = requests.get(url, headers=headers)
        soup = BeautifulSoup(result.content, 'html.parser')
        ply = soup.find_all("a", {"class": "player_name_players_table"})
        pos = soup.find_all("div", {"class": "font-weight-bold"})
        
        if ply:
            for j in range(len(ply)):
                players = pd.concat([pd.DataFrame([[ply[j].text, pos[j].text, ply[j].get('data-site-id'), ply[j].get('data-year'), ply[j].get('href')]], columns=players.columns), players], ignore_index=True)
        else:
            valid = False
            
        if out and page_no % count == 0:
            print(page_no)
            
        time.sleep(2)

    return players

def get_stats(players: pd.DataFrame, out=False, count=100):
    links = players['link']
    position = players['pos']

    stats = pd.DataFrame(columns = ['player_id', 'dob', 'club', 'club_id', 'league', 'league_id', 'nation', 'card_id', 'rating', 'pac', 'acceleration', 'sprint_speed', 'sho', 'positioning', 'finishing', 'shot_power', 'long_shots', 
                                    'volleys', 'penalties', 'pas', 'vision', 'crossing', 'fk_accuracy', 'short_passing', 'long_passing', 'curve', 'dri', 'agility', 'balance',
                                    'reactions', 'ball_control', 'dribbling', 'composure', 'def', 'interceptions', 'heading_acc', 'def_awareness', 
                                    'standing_tackle', 'sliding_tackle', 'phy', 'jumping', 'stamina', 'strength', 'aggression', 'gk_diving', 'gk_handling', 'gk_kicking',
                                    'gk_reflexes', 'gk_speed', 'gk_positioning'])

    for i in range(len(links)):
        url = f'https://www.futbin.com{links[i]}'
        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
        result = requests.get(url, headers=headers)
        soup = BeautifulSoup(result.content, 'html.parser')
        rating = soup.find("div", {"class": "pcdisplay-rat"})
        ids = soup.find("div", {"id": "page-info"})
        stat_tab = soup.find("div", {"class": "card-body"})
        ss = stat_tab.find_all("div", {"class": "stat_val"})
        dob = soup.find("a", {"style": "color : #346fda;"}).get("title").split()[2]
        
        spec_list = soup.find("div", {"id": "info_content"})
        spec_tab = spec_list.find_all("td")
        club = ""
        league = ""
        nation = ""
        
        spec_len = len(spec_tab)
        club_id = spec_tab[spec_len-2].text
        league_id = spec_tab[spec_len-1].text
        year = int(ids.get("data-year"))
        int_rep = False
        if spec_list.find_all("th")[spec_len-14].text == "Intl. Rep ":
            int_rep = True
        if year == 23:
            if int_rep:
                club = spec_tab[spec_len-19].find("a").text
                try:
                    league = spec_tab[spec_len-17].find("a").text
                except:
                    print(url)
                nation = spec_tab[spec_len-18].find("a").text
            else:
                club = spec_tab[spec_len-18].find("a").text
                league = spec_tab[spec_len-16].find("a").text
                nation = spec_tab[spec_len-17].find("a").text
        elif year < 23 and year >= 15:
            if int_rep:
                club = spec_tab[spec_len-18].find("a").text
                try:
                    league = spec_tab[spec_len-16].find("a").text
                except:
                    print(url)
                nation = spec_tab[spec_len-17].find("a").text
            else:
                club = spec_tab[spec_len-17].find("a").text
                league = spec_tab[spec_len-15].find("a").text
                nation = spec_tab[spec_len-16].find("a").text
        else:
            print("L")
            
            
        if ss and position[i] != 'GK':
            stats = pd.concat([pd.DataFrame([[ids.get("data-baseid"), dob, club, club_id, league, league_id, nation, ids.get('data-id'), rating.text, ss[1].text, ss[3].text, ss[5].text, ss[7].text, ss[9].text,
                                            ss[11].text, ss[13].text, ss[15].text, ss[17].text, ss[19].text, ss[21].text, ss[23].text, ss[25].text,
                                            ss[27].text, ss[29].text, ss[31].text, ss[33].text, ss[35].text, ss[37].text, ss[39].text, ss[41].text, ss[43].text,
                                            ss[45].text, ss[47].text, ss[49].text, ss[51].text, ss[53].text, ss[55].text, ss[57].text, ss[59].text,
                                            ss[61].text, ss[63].text, ss[65].text, ss[67].text, ss[69].text, None, None, None, None, None, None]], columns=stats.columns), stats], ignore_index=True)            
        else:
            stats = pd.concat([pd.DataFrame([[ids.get("data-baseid"), dob, club, club_id, league, league_id, nation, ids.get('data-id'), rating.text, None, None, None, 
                                                None, None, None, None, None, None, None, None, None, None,
                                                None, None, None, None, None, None, None, None, None, None, None, None, None, None,
                                                None, None, None, None, None, None, None, None, ss[1].text, ss[5].text, ss[9].text, 
                                                ss[13].text, ss[17].text, ss[21].text]], columns=stats.columns), stats], ignore_index=True)

    if out and i % count == 0:
            print(i)
    
    return stats

In [3]:
leagues = [13, 53, 31, 16, 19]

# Prem: 13
# La Liga: 53
# Serie A: 31
# ligue 1: 16
# Bundesliga: 19

dfs = []

for i in leagues:
    dfs.append(get_players(year=22, out=True, version='gold', league=i))

https://www.futbin.com/22/players?page=1&version=gold&league=13
https://www.futbin.com/22/players?page=2&version=gold&league=13
https://www.futbin.com/22/players?page=3&version=gold&league=13
https://www.futbin.com/22/players?page=4&version=gold&league=13
https://www.futbin.com/22/players?page=5&version=gold&league=13
https://www.futbin.com/22/players?page=6&version=gold&league=13
https://www.futbin.com/22/players?page=7&version=gold&league=13
https://www.futbin.com/22/players?page=8&version=gold&league=13
https://www.futbin.com/22/players?page=9&version=gold&league=13
https://www.futbin.com/22/players?page=10&version=gold&league=13
10
https://www.futbin.com/22/players?page=11&version=gold&league=13
https://www.futbin.com/22/players?page=12&version=gold&league=13
https://www.futbin.com/22/players?page=13&version=gold&league=13
https://www.futbin.com/22/players?page=14&version=gold&league=13
https://www.futbin.com/22/players?page=15&version=gold&league=13
https://www.futbin.com/22/playe

In [4]:
dfs[0]

Unnamed: 0,name,pos,card_id,year,link
0,Matt Turner,GK,27363,22,/22/player/27363/matt-turner
1,William Saliba,CB,27257,22,/22/player/27257/william-saliba
2,Julián Álvarez,RW,27249,22,/22/player/27249/julian-alvarez
3,Luis Sinisterra,LW,26502,22,/22/player/26502/luis-sinisterra
4,Rasmus Kristensen,RB,26498,22,/22/player/26498/rasmus-kristensen
...,...,...,...,...,...
389,Heung Min Son,LM,346,22,/22/player/346/heung-min-son
390,N'Golo Kanté,CDM,383,22,/22/player/383/n-golo-kante
391,Harry Kane,ST,382,22,/22/player/382/harry-kane
392,Cristiano Ronaldo,ST,426,22,/22/player/426/cristiano-ronaldo


In [7]:
stat_dfs = []

for i in dfs:
    stat_dfs.append(get_stats(players=i, out=True))

https://www.futbin.com/22/player/27363/matt-turner
https://www.futbin.com/22/player/27257/william-saliba
https://www.futbin.com/22/player/27249/julian-alvarez
https://www.futbin.com/22/player/26502/luis-sinisterra
https://www.futbin.com/22/player/26498/rasmus-kristensen
https://www.futbin.com/22/player/26418/cheick-doucoure
https://www.futbin.com/22/player/26375/fraser-forster
https://www.futbin.com/22/player/26273/brandon-williams
https://www.futbin.com/22/player/24022/dan-burn
https://www.futbin.com/22/player/23998/calum-chambers
https://www.futbin.com/22/player/11215/alex-kral
https://www.futbin.com/22/player/10318/brandon-williams
https://www.futbin.com/22/player/10271/enock-mwepu
https://www.futbin.com/22/player/10158/imran-louza
https://www.futbin.com/22/player/9460/max-aarons
https://www.futbin.com/22/player/9410/alexis-mac-allister
https://www.futbin.com/22/player/9325/frank-onyeka
https://www.futbin.com/22/player/8827/joseph-willock
https://www.futbin.com/22/player/8756/juan-c

KeyboardInterrupt: 

In [None]:
comb = []

for i in range(len(dfs)):
    comb.append(pd.merge(dfs[i], stat_dfs[i], on='card_id'))

In [None]:
total = pd.concat(comb)

In [None]:
total

In [None]:
len(total.player_id.unique())

In [93]:
total.to_csv("futbin_2022.csv", index=False)