In [19]:
import pandas as pd
import numpy as np
import sqlite3
import schedule
import time
import requests
from bs4 import BeautifulSoup
from datetime import date, datetime
from openpyxl import load_workbook

In [20]:
req=requests.get('https://playerstats.football/')
soup=BeautifulSoup(req.text, 'html.parser')

In [21]:
data=soup.find_all('div', class_='border-b border-purple-100/80')

In [22]:
def is_time_format(time_str, time_format="%H:%M"):
    try:
        # Try parsing the string with the given format
        datetime.strptime(time_str, time_format)
        return True
    except ValueError:
        # If parsing fails, it's not a valid time
        return False

cups = {}

for item in data:
    cup = item.find('h2').text.strip()
    
    # Find all teams and time elements
    team1_elements = item.find_all('a', class_='flex items-center whitespace-wrap mb-2 text-sm')
    team2_elements = item.find_all('a', class_='flex items-center text-sm')
    time_elements = item.find_all('div', class_='pr-3 sm:pr-6')
    
    for team1, team2, time_element in zip(team1_elements, team2_elements, time_elements):
        # Safely extract team and time details
        first_team = team1.find('div', class_='whitespace-nowrap truncate underline decoration-slate-300 decoration-dotted underline-offset-4 hover:opacity-60')
        second_team = team2.find('div', class_='whitespace-nowrap truncate underline decoration-slate-300 decoration-dotted underline-offset-4 hover:opacity-60')
        time = time_element.find('div', class_='flex items-center text-grey-500')
        
        # Check if time or score is present
        if is_time_format(time.text.strip(), "%H:%M"):
            valid_time = time.text.strip()
            score="Not started Yet"
        else:
            valid_time = "Ended"
            score=time.text.strip()
            
        # Check if the match is on play
        if time_element.find('a', href="/inplay") is not None:
            valid_time='on play'
        
        # Check if all details are present
        if first_team and second_team and time and score:
            match = [
                first_team.text.strip(),
                second_team.text.strip(),
                valid_time,
                score
            ]
            cups.setdefault(cup, []).append(match)

# Print the organized data
print("Scraped Data (Dictionary):")
print(cups)



Scraped Data (Dictionary):
{'Premier League': [['Crystal Palace', 'Brentford', 'Ended', '1-2'], ['Tottenham Hotspur', 'Leicester City', 'Ended', '1-2'], ['Aston Villa', 'West Ham United', 'on play', '1-0'], ['Fulham', 'Manchester United', '19:00', 'Not started Yet'], ['Araz', 'Neftçi', 'Ended', '2-1'], ['Qarabağ', 'Turan', 'Ended', '1-2']], 'La Liga': [['Rayo Vallecano', 'Girona', 'Ended', '2-1'], ['Real Sociedad', 'Getafe', 'Ended', '0-3'], ['Athletic Club', 'Leganés', '17:30', 'Not started Yet'], ['FC Barcelona', 'Valencia', '20:00', 'Not started Yet']], 'Bundesliga': [['TSG Hoffenheim', 'Eintracht Frankfurt', 'Ended', '2-2'], ['St. Pauli', 'FC Union Berlin', 'on play', '1-0']], 'Serie A': [['Milan', 'Parma', 'Ended', '3-2'], ['Udinese', 'Roma', 'Ended', '1-2'], ['Lecce', 'Inter', 'on play', '0-1'], ['Lazio', 'Fiorentina', '19:45', 'Not started Yet']], 'Ligue 1': [['Le Havre', 'Brest', 'Ended', '0-1'], ['Nantes', 'Olympique Lyonnais', 'on play', '0-1'], ['Toulouse', 'Montpellier', 'o

In [23]:
# Convert the dictionary into a structured DataFrame

rows = []
for cup, matches in cups.items():
    today = date.today()
    today = today.strftime("%d/%m/%Y")
    for match in matches:
        rows.append({'Date':today,'Cup': cup, 'Team 1': match[0], 'Team 2': match[1], 'Time': match[2], 'Score': match[3]})

df = pd.DataFrame(rows)

# Print the DataFrame
print("\nScraped Data (DataFrame):")
print(df)


Scraped Data (DataFrame):
          Date               Cup             Team 1             Team 2  \
0   26/01/2025    Premier League     Crystal Palace          Brentford   
1   26/01/2025    Premier League  Tottenham Hotspur     Leicester City   
2   26/01/2025    Premier League        Aston Villa    West Ham United   
3   26/01/2025    Premier League             Fulham  Manchester United   
4   26/01/2025    Premier League               Araz             Neftçi   
..         ...               ...                ...                ...   
68  26/01/2025  Saudi Pro League           Al Nassr           Al Fateh   
69  26/01/2025     Liga Portugal            Estoril         Vitória SC   
70  26/01/2025     Liga Portugal              Porto        Santa Clara   
71  26/01/2025     Liga Portugal     Sporting Braga           Boavista   
72  26/01/2025       Ligat ha'Al    Maccabi Netanya      Hapoel Hadera   

       Time            Score  
0     Ended              1-2  
1     Ended           

In [24]:
with pd.ExcelWriter('matches.xlsx',mode='a',engine='openpyxl',if_sheet_exists='replace') as writer:
    df.to_excel(writer, sheet_name=f'Matches_{date.today().strftime("%Y-%m-%d")}', index=False)
print('done')

done
