In [69]:
import pandas as pd
import numpy as np
import sqlite3
import schedule
import time
import requests
from bs4 import BeautifulSoup
from datetime import date, datetime

In [70]:
req=requests.get('https://playerstats.football/')
soup=BeautifulSoup(req.text, 'html.parser')

In [71]:
data=soup.find_all('div', class_='border-b border-purple-100/80')

In [72]:
def is_time_format(time_str, time_format="%H:%M"):
    try:
        # Try parsing the string with the given format
        datetime.strptime(time_str, time_format)
        return True
    except ValueError:
        # If parsing fails, it's not a valid time
        return False

cups = {}

for item in data:
    cup = item.find('h2').text.strip()
    
    # Find all teams and time elements
    team1_elements = item.find_all('a', class_='flex items-center whitespace-wrap mb-2 text-sm')
    team2_elements = item.find_all('a', class_='flex items-center text-sm')
    time_elements = item.find_all('div', class_='pr-3 sm:pr-6')
    
    for team1, team2, time_element in zip(team1_elements, team2_elements, time_elements):
        # Safely extract team and time details
        first_team = team1.find('div', class_='whitespace-nowrap truncate underline decoration-slate-300 decoration-dotted underline-offset-4 hover:opacity-60')
        second_team = team2.find('div', class_='whitespace-nowrap truncate underline decoration-slate-300 decoration-dotted underline-offset-4 hover:opacity-60')
        time = time_element.find('div', class_='flex items-center text-grey-500')
        
        # Check if time or score is present
        if is_time_format(time.text.strip(), "%H:%M"):
            valid_time = time.text.strip()
            score="Not started Yet"
        else:
            valid_time = "Ended"
            score=time.text.strip()
            
        # Check if the match is on play
        if time_element.find('a', href="/inplay") is not None:
            valid_time='on play'
        
        # Check if all details are present
        if first_team and second_team and time and score:
            match = [
                first_team.text.strip(),
                second_team.text.strip(),
                valid_time,
                score
            ]
            cups.setdefault(cup, []).append(match)

# Print the organized data
print("Scraped Data (Dictionary):")
print(cups)



Scraped Data (Dictionary):
{'Premier League': [['AFC Bournemouth', 'Nottingham Forest', 'on play', '1-0'], ['Brighton & Hove Albion', 'Everton', 'on play', '0-1'], ['Liverpool', 'Ipswich Town', 'on play', '3-0'], ['Southampton', 'Newcastle United', 'on play', '1-2'], ['Wolverhampton Wanderers', 'Arsenal', 'on play', '0-0'], ['Manchester City', 'Chelsea', '17:30', 'Not started Yet'], ['Sebail', 'Kapaz', 'Ended', '1-0'], ['Sabah', 'Sumqayıt', 'on play', '0-0']], 'Championship': [['Luton Town', 'Millwall', 'Ended', '0-1'], ['Norwich City', 'Swansea City', 'Ended', '5-1'], ['Stoke City', 'Oxford United', 'Ended', '0-0'], ['Bristol City', 'Blackburn Rovers', 'on play', '1-1'], ['Cardiff City', 'Derby County', 'on play', '0-0'], ['Coventry City', 'Watford', 'on play', '1-0'], ['Preston North End', 'Middlesbrough', 'on play', '1-0'], ['Queens Park Rangers', 'Sheffield Wednesday', 'on play', '0-0'], ['Sunderland', 'Plymouth Argyle', 'on play', '0-0'], ['West Bromwich Albion', 'Portsmouth', 'on

In [73]:
# Convert the dictionary into a structured DataFrame

rows = []
for cup, matches in cups.items():
    today = date.today()
    today = today.strftime("%d/%m/%Y")
    for match in matches:
        rows.append({'Date':today,'Cup': cup, 'Team 1': match[0], 'Team 2': match[1], 'Time': match[2], 'Score': match[3]})

df = pd.DataFrame(rows)

# Print the DataFrame
print("\nScraped Data (DataFrame):")
print(df)


Scraped Data (DataFrame):
           Date             Cup                   Team 1             Team 2  \
0    25/01/2025  Premier League          AFC Bournemouth  Nottingham Forest   
1    25/01/2025  Premier League   Brighton & Hove Albion            Everton   
2    25/01/2025  Premier League                Liverpool       Ipswich Town   
3    25/01/2025  Premier League              Southampton   Newcastle United   
4    25/01/2025  Premier League  Wolverhampton Wanderers            Arsenal   
..          ...             ...                      ...                ...   
114  25/01/2025   Liga Portugal                Famalicão    Estrela Amadora   
115  25/01/2025   Liga Portugal                 Casa Pia            Benfica   
116  25/01/2025   Liga Portugal              Sporting CP           Nacional   
117  25/01/2025      Eredivisie                      PSV          NAC Breda   
118  25/01/2025      Eredivisie             FC Groningen      SC Heerenveen   

        Time            

In [74]:
with pd.ExcelWriter('matches.xlsx') as writer:
    df.to_excel(writer, sheet_name=f'Matches_{date.today().strftime("%Y-%m-%d")}', index=False)
print('done')

done
