In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

In [2]:
#gets all available dates 
link=f'https://www.yallakora.com/match-center'
result=requests.get(link)
content=result.content
soup=BeautifulSoup(content,'lxml')
match_center=soup.find("section",class_="matchesCenter")
buttons = match_center.find_all('button', class_='dayTabLinks')
str_dates = [button['date'] for button in buttons]
dates = [datetime.strptime(date_str, '%m/%d/%Y') for date_str in str_dates]

In [3]:
# fetch dates and filter them
start_date = datetime.strptime(input("please enter start date in format mm/dd/yyyy :"), '%m/%d/%Y')
if start_date not in dates :
    raise ValueError("start_date should be exists in the website")
end_date = datetime.strptime(input("please enter end date in format mm/dd/yyyy :"), '%m/%d/%Y')
if end_date not in dates :
    raise ValueError("end_date should be exists in the website")  
filtered_dates = [date for date in dates if start_date <= date <= end_date]
today_date = datetime.today()

dataframes = []  # Initialize an empty list to store DataFrames

# fetch and process data for each date
for d in filtered_dates:
    link = f'https://www.yallakora.com/match-center/مركز-المباريات?date={d}#'
    result = requests.get(link)
    content = result.content
    soup = BeautifulSoup(content, 'lxml')
    match_cards = soup.find_all("div", class_="matchCard")

    channel_list = []
    round_list = []
    status_list = []
    team_a_list = []
    team_b_list = []
    result_list = []
    time_list = []
    champion_list = []
    details_list=[]
    for i in range(len(match_cards)):
        
        if d >= today_date :
            matches = match_cards[i].find_all("div", class_="item future liItem")
        else :
            matches = match_cards[i].find_all("div", class_="item finish liItem")
            
        champion_name=match_cards[i].find("h2")
        for match in matches:
            channel = match.find("div", class_="channel icon-channel")
            if channel:
                channel_list.append(channel.text.strip())
            else:
                channel_list.append("unknown")
            
            roundd = match.find("div", class_="date")
            if roundd:
                round_list.append(roundd.text.strip())
            else:
                round_list.append("unknown")
            
            status = match.find("div", class_="matchStatus")
            if status:
                status_list.append(status.text.strip())
            else:
                status_list.append("unknown")
            
            team_a = match.find("div", class_="teams teamA")
            if team_a:
                team_a_list.append(team_a.text.strip())
            else:
                team_a_list.append("unknown")

            team_b = match.find("div", class_="teams teamB")
            if team_b:
                team_b_list.append(team_b.text.strip())
            else:
                team_b_list.append("unknown")

            result=match.find("div",class_="MResult").find_all("span",class_="score")
            result_list.append(result[0].text.strip()+"-"+result[1].text.strip())
        
            time=match.find("div",class_="MResult").find("span",class_="time")
            time_list.append(time.text.strip())
            
            details=match.find("a",class_="button details")["href"]
            details_list.append(details)
            champion_list.append(champion_name.text.strip())

    table_data = list(zip(champion_list,round_list,channel_list,team_a_list,result_list,team_b_list,time_list,status_list,details_list))
    df = pd.DataFrame(table_data, columns=['champion','round', 'channel', 'team a','result','team b','time','status','more details'])
    # Add date column
    df['date'] = d
    # Append DataFrame to the list
    dataframes.append(df)

# Concatenate all DataFrames in the list into a single DataFrame
final_df = pd.concat(dataframes, ignore_index=True)

please enter start date in format mm/dd/yyyy :4/1/2024
please enter end date in format mm/dd/yyyy :4/25/2024


In [4]:
final_df

Unnamed: 0,champion,round,channel,team a,result,team b,time,status,more details,date
0,الدوري الإسباني,الأسبوع الثلاثون,بى ان سبورت 3 HD,فياريال,1-2,اتلتيكو مدريد,21:00,انتهت,/la-liga/2833/match/94373/%d9%81%d9%8a%d8%a7%d...,2024-04-01
1,الدوري السعودي,الأسبوع السادس والعشرون,SSc SPORT,الرائد,0-1,الخليج,21:00,انتهت,/ksa-league/2842/match/95490/%d8%a7%d9%84%d8%b...,2024-04-01
2,الدوري السعودي,الأسبوع السادس والعشرون,unknown,الحزم,0-0,ضمك,21:00,انتهت,/ksa-league/2842/match/95491/%d8%a7%d9%84%d8%a...,2024-04-01
3,الدوري السعودي,الأسبوع السادس والعشرون,SSc SPORT,أهلي جدة,1-0,الاتحاد,21:00,انتهت,/ksa-league/2842/match/95492/%d8%a3%d9%87%d9%8...,2024-04-01
4,الدوري الإيطالي,الأسبوع الثلاثون,Starz Play App,بولونيا,3-0,ساليرنيتانا,12:30,انتهت,/serie-a/2836/match/94350/%d8%a8%d9%88%d9%84%d...,2024-04-01
...,...,...,...,...,...,...,...,...,...,...
491,كأس أفريقيا للأندية أبطال الكؤوس,نصف النهائي,unknown,الزمالك,30-26,شبيبة سكيكدة,20:00,انتهت,/african-men-s-handball-cup-winners-cup/2895/m...,2024-04-25
492,الدوري السعودي,الأسبوع التاسع والعشرون,SSc SPORT,الفيحاء,1-0,الطــائي,17:00,انتهت,/ksa-league/2842/match/95516/%d8%a7%d9%84%d9%8...,2024-04-25
493,الدوري السعودي,الأسبوع التاسع والعشرون,SSc SPORT,الرياض,2-1,أهلي جدة,20:00,انتهت,/ksa-league/2842/match/95517/%d8%a7%d9%84%d8%b...,2024-04-25
494,الدوري السعودي,الأسبوع التاسع والعشرون,SSc SPORT,الوحدة,0-2,الحزم,20:00,انتهت,/ksa-league/2842/match/95518/%d8%a7%d9%84%d9%8...,2024-04-25


In [5]:
result=final_df.to_excel(r"path",index=False)