In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [6]:
base_url = "https://www.transfermarkt.com/egyptian-premier-league/startseite/wettbewerb/EGY1/plus/?saison_id={year}"

years = list(range(2000, 2025))
season_links = []

for year in years:
    url = base_url.format(year=year)
    season_links.append({'year': year,'url': url})

In [7]:
def get_df(item):
    url = item['url']
    year = item['year']
    response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
    soup = BeautifulSoup(response.content, 'lxml')
    table = soup.find('table', class_='items')
    rows = table.find_all('tr', {'class': ['odd', 'even']})
    data = []

    for row in rows:
        team_cell = row.find_all('td')[1]
        link_tag = team_cell.find('a')
        if link_tag and 'href' in link_tag.attrs:
            team_url = "https://www.transfermarkt.com" + link_tag['href']
        else:
            team_url = None

        cols = row.find_all('td')
        club = cols[1].text.strip()
        squad = cols[2].text.strip()
        avg_age = cols[3].text.strip()
        foreigners = cols[4].text.strip()
        avg_value = cols[5].text.strip().replace('€', '').replace('k', '000').replace('m', '000000')
        total_value = cols[6].text.strip().replace('€', '').replace('k', '000').replace('m', '000000')
        data.append([club, squad, avg_age, foreigners, avg_value, total_value, team_url])


    df = pd.DataFrame(data, columns=['Club', 'Squad Size', 'Avg Age', 'Foreigners',
                                     'Avg Market Value (EUR)', 'Total Market Value (EUR)', 'Team URL'])
    df['Squad Size'] = pd.to_numeric(df['Squad Size'], errors='coerce')
    df['Avg Age'] = pd.to_numeric(df['Avg Age'], errors='coerce')
    df['Foreigners'] = pd.to_numeric(df['Foreigners'], errors='coerce')
    df['Avg Market Value (EUR)'] = pd.to_numeric(df['Avg Market Value (EUR)'], errors='coerce')
    df['Total Market Value (EUR)'] = pd.to_numeric(df['Total Market Value (EUR)'], errors='coerce')
    df['Year'] = year

    return df

In [None]:
data = []
for item in season_links:
    try:
        df = get_df(item)
        data.append(df)
    except Exception as e:
        print(f"Failed: {url}\n   Error: {e}")
        continue

In [55]:
data = pd.concat(data, ignore_index=True)

In [75]:
second_try = []

for year in years:
    existed_years = data['Year'].value_counts().index.tolist()
    if year not in existed_years:
        second_try.append(year)

In [76]:
second_try

[2011, 2012, 2013, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]

In [77]:
existed_years

[2014, 2006, 2007, 2008, 2009, 2010, 2001, 2000, 2003, 2002, 2004, 2005]

In [78]:
second_links= []
for year in second_try:
    url = base_url.format(year=year)
    second_links.append({'year': year,'url': url})

In [79]:
second_data = []
for item in second_links:
    try:
        df = get_df(item)
        second_data.append(df)
    except Exception as e:
        print(f"Failed: {url}\n   Error: {e}")
        continue

Failed: https://www.transfermarkt.com/egyptian-premier-league/startseite/wettbewerb/EGY1/plus/?saison_id=2024
   Error: 'NoneType' object has no attribute 'find_all'
Failed: https://www.transfermarkt.com/egyptian-premier-league/startseite/wettbewerb/EGY1/plus/?saison_id=2024
   Error: 'NoneType' object has no attribute 'find_all'


In [82]:
second_data = pd.concat(second_data, ignore_index=True)

In [83]:
second_data

Unnamed: 0,Club,Squad Size,Avg Age,Foreigners,Avg Market Value (EUR),Total Market Value (EUR),Team URL,Year
0,Al Ahly FC,31,27.4,3,613000.0,19.00,https://www.transfermarkt.com/el-ahly-kairo/st...,2011
1,Zamalek SC,31,26.4,4,502000.0,15.58,https://www.transfermarkt.com/zamalek-sc/start...,2011
2,Ismaily SC,29,26.8,5,327000.0,9.48,https://www.transfermarkt.com/ismaily-sc/start...,2011
3,Enppi SC,32,26.0,4,183000.0,5.85,https://www.transfermarkt.com/enppi-sc/startse...,2011
4,El Masry SC,27,26.4,2,189000.0,5.10,https://www.transfermarkt.com/el-masry-sc/star...,2011
...,...,...,...,...,...,...,...,...
198,Enppi SC,43,24.5,4,137000.0,5.90,https://www.transfermarkt.com/enppi-sc/startse...,2023
199,Ismaily SC,56,24.8,4,100000.0,5.58,https://www.transfermarkt.com/ismaily-sc/start...,2023
200,Baladiyat El Mahalla,52,25.5,11,94000.0,4.90,https://www.transfermarkt.com/baladiya-mahalla...,2023
201,El Gouna FC,43,25.5,9,111000.0,4.78,https://www.transfermarkt.com/el-gouna-fc/star...,2023


In [84]:
final_data = pd.concat([data, second_data], ignore_index=True)

In [91]:
Ahly = final_data[final_data['Club']=='Al Ahly FC']
Zamalek = final_data[final_data['Club']=='Zamalek SC']
Ismaily  = final_data[final_data['Club']=='Ismaily SC']

In [92]:
Ismaily

Unnamed: 0,Club,Squad Size,Avg Age,Foreigners,Avg Market Value (EUR),Total Market Value (EUR),Team URL,Year
3,Ismaily SC,28,25.2,3,,,https://www.transfermarkt.com/ismaily-sc/start...,2000
17,Ismaily SC,27,24.6,3,,,https://www.transfermarkt.com/ismaily-sc/start...,2001
31,Ismaily SC,34,24.3,3,,,https://www.transfermarkt.com/ismaily-sc/start...,2002
44,Ismaily SC,37,23.4,5,,,https://www.transfermarkt.com/ismaily-sc/start...,2003
57,Ismaily SC,32,23.9,4,59000.0,1.9,https://www.transfermarkt.com/ismaily-sc/start...,2004
73,Ismaily SC,31,24.1,3,24000.0,750000.0,https://www.transfermarkt.com/ismaily-sc/start...,2005
86,Ismaily SC,30,25.1,3,70000.0,2.1,https://www.transfermarkt.com/ismaily-sc/start...,2006
102,Ismaily SC,29,25.0,4,309000.0,8.98,https://www.transfermarkt.com/ismaily-sc/start...,2007
118,Ismaily SC,34,25.2,4,223000.0,7.58,https://www.transfermarkt.com/ismaily-sc/start...,2008
134,Ismaily SC,36,26.2,6,324000.0,11.65,https://www.transfermarkt.com/ismaily-sc/start...,2009


In [2]:
DATA = pd.read_csv('egyptian_league_market_values.csv')

In [3]:
DATA.head()

Unnamed: 0,Club,Squad Size,Avg Age,Foreigners,Avg Market Value (EUR),Total Market Value (EUR),Team URL,Year
0,Al Ahly FC,34,23.9,4,,,https://www.transfermarkt.com/el-ahly-kairo/st...,2000
1,Zamalek SC,29,26.3,0,,,https://www.transfermarkt.com/zamalek-sc/start...,2000
2,El Mokawloon SC,23,25.0,0,,,https://www.transfermarkt.com/el-mokawloon-sc/...,2000
3,Ismaily SC,28,25.2,3,,,https://www.transfermarkt.com/ismaily-sc/start...,2000
4,Ittihad Alexandria SC,28,25.6,2,,,https://www.transfermarkt.com/ittihad-alexandr...,2000


In [4]:
DATA.tail()

Unnamed: 0,Club,Squad Size,Avg Age,Foreigners,Avg Market Value (EUR),Total Market Value (EUR),Team URL,Year
382,Enppi SC,43,24.5,4,137000.0,5.9,https://www.transfermarkt.com/enppi-sc/startse...,2023
383,Ismaily SC,56,24.8,4,100000.0,5.58,https://www.transfermarkt.com/ismaily-sc/start...,2023
384,Baladiyat El Mahalla,52,25.5,11,94000.0,4.9,https://www.transfermarkt.com/baladiya-mahalla...,2023
385,El Gouna FC,43,25.5,9,111000.0,4.78,https://www.transfermarkt.com/el-gouna-fc/star...,2023
386,El Dakhlia SC,55,25.7,7,78000.0,4.3,https://www.transfermarkt.com/el-dakhlia-sc/st...,2023
