In [6]:
# Importing dependencies

import requests
from random import randint
from time import sleep
from bs4 import BeautifulSoup
import pandas as pd

In [7]:
# Creating profile URL for each manager and scrape the data

def parse_profiles(id):
    profile_url = f"https://www.premierleague.com/managers/{id}"
    delay = randint(1,3)
    print("Sleep " + str(delay) + "s")
    sleep(delay)
    response = requests.get(profile_url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        manager_of_month = "0"
        manager_of_season = "0"
        premier_league_champion = "0"

        premier_league_records = soup.find('h3', class_='player-overview__sub-header', text='Premier League Records')
        if premier_league_records:
            records_div = premier_league_records.find_next('div')
            if records_div:
                stats = records_div.find_all('div', class_='player-overview__col')
                stats_data = {}
                for stat in stats:
                    label = stat.find('div', class_='player-overview__label').text.strip()
                    value = stat.find('div', class_='player-overview__info').text.strip()
                    stats_data[label] = value
                
            honours_awards = soup.find('h3', class_='player-overview__sub-header', text='Honours & Awards')
            if honours_awards:
                awards_div = honours_awards.find_next('div', class_='player-overview__honours-awards')
                if awards_div:
                    manager_of_month_element = awards_div.find('div', class_='player-overview__info', text='Manager of the Month')
                    if manager_of_month_element:
                        manager_of_month = int(manager_of_month_element.find_next('div', class_='player-overview__info u-text-right').text.strip())
                    premier_league_champion_element = awards_div.find('div', class_='player-overview__info', text='Premier League Champion')
                    if premier_league_champion_element:
                        premier_league_champion = int(premier_league_champion_element.find_next('div', class_='player-overview__info u-text-right').text.strip())
                    manager_of_season_element = awards_div.find('div', class_='player-overview__info', text='Manager of the Season')
                    if manager_of_season_element:
                        manager_of_season = int(manager_of_season_element.find_next('div', class_='player-overview__info u-text-right').text.strip())
                return manager_of_month, manager_of_season, premier_league_champion, stats_data
    else:
        print(f"Failed to fetch data for {profile_url}")
        print(response.status_code)
        return 0, 0, 0
    
#print(parse_profiles(4677))

In [8]:
# Fetch managers' data using football API
# Including URL, headers, and query parameters
# Perform GET request, extract JSON, and initialize empty list for managers' data.

def get_managers():
    url = "https://footballapi.pulselive.com/football/teamofficials"
    querystring = {"pageSize":"500","comps":"1","altIds":"true","type":"manager","compCodeForActivePlayer":"EN_PR","page":"0"}
    headers = {
        "accept": "*/*",
        "accept-language": "en-US,en;q=0.9",
        "account": "premierleague",
        "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
        "^if-none-match": "W/^\^0c10db89c098db1623194e0f0a62b5826^^^",
        "origin": "https://www.premierleague.com",
        "referer": "https://www.premierleague.com/",
        "^sec-ch-ua": "^\^Microsoft",
        "sec-ch-ua-mobile": "?0",
        "^sec-ch-ua-platform": "^\^Windows^^^",
        "sec-fetch-dest": "empty",
        "sec-fetch-mode": "cors",
        "sec-fetch-site": "cross-site",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0"
    }
    r = requests.get(url, headers=headers, params=querystring)
    data = r.json()['content']
    return data


In [9]:
# Send requests

def parse_managers(data):
    managers_data = []
    for manager in data:
        name = manager['name']['display']
        club = manager['currentTeam']['name']
        age = int(manager['age'][:2])
        country = manager['birth']['country']['country']
        active_status = manager['active']
        id = int(manager['id'])
        profile_data = parse_profiles(id)
        if profile_data:
            stats_data = profile_data[3]
            manager_of_month, manager_of_season, premier_league_champion = profile_data[:3]
            manager = {
                'Name': name,
                'Club': club,
                'Age': age,
                'Country': country,
                'Active': active_status,
                'Manager of the Month': manager_of_month,
                'Manager of the Season': manager_of_season,
                'EPL Chamption': premier_league_champion
            }
            manager.update(stats_data)
            managers_data.append(manager)
    return managers_data

# Fetch data of managers
data = get_managers()

# Parse managers data and fetch their profile details
managers_data = parse_managers(data)

Sleep 2s
Sleep 1s
Sleep 1s
Sleep 1s
Sleep 2s
Sleep 3s
Sleep 3s
Sleep 3s
Sleep 3s
Sleep 3s
Sleep 3s
Sleep 1s
Sleep 1s
Sleep 1s
Sleep 2s
Sleep 3s
Sleep 1s
Sleep 3s
Sleep 3s
Sleep 1s
Sleep 1s
Sleep 2s
Sleep 1s
Sleep 3s
Sleep 3s
Sleep 3s
Sleep 2s
Sleep 1s
Sleep 3s
Sleep 2s
Sleep 2s
Sleep 3s
Sleep 3s
Sleep 3s
Sleep 3s
Sleep 1s
Sleep 1s
Sleep 3s
Sleep 3s
Sleep 1s
Sleep 2s
Sleep 2s
Sleep 3s
Sleep 2s
Sleep 1s
Sleep 2s
Sleep 2s
Sleep 3s
Sleep 1s
Sleep 3s
Sleep 3s
Sleep 1s
Sleep 1s
Sleep 3s
Sleep 3s
Sleep 3s
Sleep 3s
Sleep 2s
Sleep 2s
Sleep 3s
Sleep 3s
Sleep 1s
Sleep 2s
Sleep 1s
Sleep 1s
Sleep 2s
Sleep 3s
Sleep 3s
Sleep 2s
Sleep 2s
Sleep 1s
Sleep 1s
Sleep 1s
Sleep 3s
Sleep 2s
Sleep 1s
Sleep 2s
Sleep 1s
Sleep 3s
Sleep 3s
Sleep 1s
Sleep 2s
Sleep 2s
Sleep 2s
Sleep 1s
Sleep 1s
Sleep 3s
Sleep 1s
Sleep 3s
Sleep 1s
Sleep 2s
Sleep 1s
Sleep 1s
Sleep 2s
Sleep 1s
Sleep 2s
Sleep 1s
Sleep 2s
Sleep 1s
Sleep 1s
Sleep 3s
Sleep 1s
Sleep 3s
Sleep 1s
Sleep 2s
Sleep 2s
Sleep 1s
Sleep 3s
Sleep 3s
Sleep 2s
Sleep 2s
S

In [10]:
# Create a DataFrame from the list of dictionaries
df = pd.DataFrame(managers_data)

# Save DataFrame to CSV
df.to_excel('managers_data.xlsx', index=False)