IMPORTING LIBRARIES 📚

In [1]:
import re
import requests
import pandas as pd
from bs4 import BeautifulSoup

Getting Data 📉

In [2]:
def data_of_players(link, players_datas):
    res = {}
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", "accept-language": "en-US,en;q=0.9"}        
    url = link
    page = requests.get(url, headers = headers)
    soup = BeautifulSoup(page.content, "html.parser")

    #Getting Players ID
    pattern = r"/(\d+)$"
    match = re.search(pattern, url)
    player_id = match.group(1)
    res["player_id"] = player_id

    #Getting Given Name and Shirt Number
    try:
        header = soup.find("h1", class_ = "data-header__headline-wrapper")
        shirt_number = header.find("span", class_ = "data-header__shirt-number").get_text(strip = True)
        given_name = " ".join(header.stripped_strings).replace(shirt_number, "").strip()
    except AttributeError:
        shirt_number = None
        given_name = None
    if given_name == None:
        try:
            given_name = soup.find("h1", class_ = "data-header__headline-wrapper").text.strip()
        except AttributeError:
            given_name = None
    res["shirt_number"] = shirt_number
    res["given_name"] = given_name

    #Getting Full Name
    try:
        full_name = soup.select_one("span.info-table__content.info-table__content--bold").get_text(strip = True)
        pattern = r"\d+"
        match = re.search(pattern, full_name)
        if(bool(match)):
            full_name = None
    except AttributeError:
        full_name = None
    res["full_name"] = full_name

    #Getting Date Of Birth
    try:
        date_of_birth = soup.select_one("span.info-table__content.info-table__content--bold a[href^='/aktuell/waspassiertheute/aktuell/new/datum/']").text.strip()
    except AttributeError:
        date_of_birth = None
    res["date_of_birth"] = date_of_birth

    #Getting Citizenship
    try:
        citizenship = soup.select_one("li.data-header__label span[itemprop='nationality']").get_text(strip = True)
    except AttributeError:
        citizenship = None
    res["citizenship"] = citizenship

    #Getting Place Of Birth
    try:
        place_of_birth = soup.select_one("li.data-header__label span.data-header__content[itemprop='birthPlace']").get_text(strip = True)
    except AttributeError:
        place_of_birth = None
    res["place_of_birth"] = place_of_birth

    #Getting Caps and Goals
    try:
        caps = soup.select_one("li.data-header__label > a:nth-of-type(1).data-header__content--highlight").get_text(strip = True)
        goals = soup.select_one("li.data-header__label > a:nth-of-type(2).data-header__content--highlight").get_text(strip = True)
    except AttributeError:
        caps = None
        goals = None
    res["caps"] = caps
    res["goals"] = goals

    #Getting Player Agent
    try:
        agent = soup.select_one("li.data-header__label:contains('Agent') a").get_text(strip = True)
    except AttributeError:
        agent = None

    #Getting Other Positions
    try:
        other_position_soup = soup.find("div", class_ = "detail-position__position")
        other_positions = [position.text.strip() for position in other_position_soup.find_all("dd", class_ = "detail-position__position")]
        other_positions = ", ".join(other_positions)
    except AttributeError:
        other_positions = None
    res["other_positions"] = other_positions

    temp = soup.select("#main > main > div > div.large-8.columns > div > div > div.large-6.large-pull-6.small-12.columns.spielerdatenundfakten > div > span")

    outfitter = None
    contract_expires = None
    foot = None
    contract_Joined = None
    height = None
    current_club = None
    date_of_last_contract = None


    for i in range(len(temp)):
        #Getting Outfitter
        if "Outfitter" in temp[i].text:
            outfitter = temp[i+1].text.strip()
        #Getting Contract Expires
        elif "Contract expires" in temp[i].text:
            contract_expires = temp[i+1].text.strip()
        #Getting Agent
        elif "agent" in temp[i].text and agent == None:
            agent = temp[i+1].text.strip()
        #Getting Foot
        elif "Foot" in temp[i].text:
            foot = temp[i+1].text.strip()
        #Getting Joined
        elif "Joined" in temp[i].text:
            contract_Joined = temp[i+1].text.strip()
        #Getting Height
        elif "Height" in temp[i].text:
            height = temp[i+1].text.strip()
            height = str("".join(filter(str.isdigit, height)))
        #Getting Current Club
        elif "Current club" in temp[i].text:
            current_club = temp[i+1].text.strip()
        #Getting last Contract
        elif "last contract" in temp[i].text:
            date_of_last_contract = temp[i+1].text.strip()

    res["outfitter"] = outfitter
    res["contract_expires"] = contract_expires
    res["agent"] = agent
    res["foot"] = foot
    res["contract_Joined"] = contract_Joined
    res["height"] = height
    res["current_club"] = current_club
    res["date_of_last_contract"] = date_of_last_contract

    players_link = link
    res["players_link"] = players_link

    return res

In [3]:
players_datas = pd.DataFrame({"player_id" : [], "shirt_number" : [], "given_name" : [], "full_name" : [], "date_of_birth" : [],
                        "citizenship" : [], "place_of_birth" : [], "caps" : [], "goals" : [], "other_positions" : [], "outfitter" : [], "contract_expires" : [],
                        "agent" : [], "foot" : [], "contract_Joined" : [], "height" : [], "current_club" : [], "date_of_last_contract" : [], "players_link" : []})
players_datas = players_datas.astype(str)
links = pd.read_csv("All_players_links.csv")

Getting Data 👩‍💻👨‍💻

In [5]:
for i in range(len(links)):
    res = data_of_players(links.loc[i, "0"], players_datas)
    players_datas = players_datas.append(res, ignore_index = True)

pd.DataFrame(players_datas).to_csv("All_players_datas.csv", index = False)