In [22]:
import csv
import time
import requests
from bs4 import BeautifulSoup as bs

In [23]:
base_site_link = "https://www.transfermarkt.com"
league_link = "https://www.transfermarkt.com/uefa-champions-league/startseite/pokalwettbewerb/CL"
#league_link = "https://www.transfermarkt.com/europa-league/startseite/pokalwettbewerb/EL"
#league_link = "https://www.transfermarkt.com/uefa-europa-conference-league/startseite/pokalwettbewerb/UCOL"
#league_link = "https://www.transfermarkt.com/laliga/startseite/wettbewerb/ES1"
#league_link = "https://www.transfermarkt.com/liga-portugal/startseite/wettbewerb/PO1"
#league_link = "https://www.transfermarkt.com/premier-league/startseite/wettbewerb/GB1"

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
}

In [24]:
def get_player_data(players_data, players_info_list,club_logo):
    for player in players_info_list:

        player_dict = {
            'club_logo': club_logo,
            'number': player.find('div', class_='rn_nummer').get_text(strip=True),
            'position': player.find_all('td', class_='zentriert')[0].get('title'),
            'name': player.find('img', class_='bilderrahmen-fixed lazy lazy').get('title'),
            'image': player.find('img', class_='bilderrahmen-fixed lazy lazy').get('data-src'),
            'birthdate': player.find_all('td', class_='zentriert')[1].get_text(strip=True).split('(')[0],
            'nationality_names': ', '.join([nationality.get('alt') for nationality in player.find_all('td', class_='zentriert')[2].find_all('img')]),
            'nationality_images': ', '.join([nationality.get('src') for nationality in player.find_all('td', class_='zentriert')[2].find_all('img')]),
            'height': player.find_all('td', class_='zentriert')[3].get_text(strip=True),
            'best_foot': player.find_all('td', class_='zentriert')[4].get_text(strip=True),
            'market_value': player.find_all('td', class_='rechts hauptlink')[0].get_text(strip=True)
        }
        
        players_data.append(player_dict)

    return players_data

def get_team_link(club, mode):
    team_href = club.find('a').get('href')
    team_link = base_site_link + team_href
    if mode == 'standard': team_link = team_link.replace('startseite', 'kader')
    if mode == 'international': team_link = team_link.replace('spielplan', 'kader') 
    team_link += '/plus/1'

    return team_link

def get_club_players_data(clubs, mode):
    players_data = []
    time.sleep(10)
    for club in clubs:
        print(club.find('a').get('title'))
        team_link = get_team_link(club, mode)
        success = False

        while not success:         
            response = requests.get(team_link, headers=headers)
            if response.status_code == 200:
                success = True
            else: 
                time.sleep(10)
            
        soup = bs(response.content, 'html.parser')
        club_logo = soup.find('div', class_='data-header__profile-container').find('img').get('src')

        players_table = soup.find('table', class_='items')    
        players_info_list = players_table.find_all('tr', class_='odd') + players_table.find_all('tr', class_='even')
        players_data = get_player_data(players_data, players_info_list, club_logo)
        time.sleep(10)

    return players_data

def get_standard_league_players_data(league_link, headers):
    response = requests.get(league_link, headers=headers)
    soup = bs(response.content, 'html.parser')
    clubs_table = soup.find('div', id='yw1')
    clubs = clubs_table.find_all('td', class_='zentriert no-border-rechts')

    players_data = get_club_players_data(clubs, 'standard')

    return players_data
    
def get_international_league_players_data(league_link, headers):
    response = requests.get(league_link, headers=headers)
    soup = bs(response.content, 'html.parser')
    clubs_table = soup.find('div', id='yw4')
    clubs = clubs_table.find_all('td', class_='no-border-links hauptlink')

    players_data = get_club_players_data(clubs, 'international')

    return players_data

In [25]:
#players_data = get_standard_league_players_data(league_link, headers)
players_data = get_international_league_players_data(league_link, headers)

Liverpool FC
FC Barcelona
Arsenal FC
Inter Milan
Atlético de Madrid
Bayer 04 Leverkusen
LOSC Lille
Aston Villa
Atalanta BC
Borussia Dortmund
Real Madrid
Bayern Munich
AC Milan
PSV Eindhoven
Paris Saint-Germain
SL Benfica
AS Monaco
Stade Brestois 29
Feyenoord Rotterdam
Juventus FC
Celtic FC
Manchester City
Sporting CP
Club Brugge KV
GNK Dinamo Zagreb
VfB Stuttgart
Shakhtar Donetsk
Bologna FC 1909
Red Star Belgrade
SK Sturm Graz
AC Sparta Prague
RB Leipzig
Girona FC
Red Bull Salzburg
Slovan Bratislava
BSC Young Boys


In [26]:
fieldnames = ['club_logo', 'number', 'position', 'name', 'image', 'birthdate', 'nationality_names', 
              'nationality_images', 'height', 'best_foot', 'market_value']

file_name = 'players-' + league_link.split('/')[3] + '.csv'
with open(file_name, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    writer.writeheader()
    writer.writerows(players_data)