In [4]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests, os
from dotenv import load_dotenv
import re
import unidecode
import datetime

In [61]:
load_dotenv()
NOTION_API_KEY = os.getenv("NOTION_API_KEY")
PLAYERS_DB_ID = os.getenv("PLAYERS_DB_ID")
TEAMS_DB_ID = os.getenv("TEAMS_DB_ID")
MATCHES_DB_ID = os.getenv("MATCHES_DB_ID")
LA_LIGA_PAGE_ID = os.getenv("LA_LIGA_PAGE_ID")
NOTION_ENDPOINT = "https://api.notion.com/v1/"

In [None]:
html = urlopen("https://www.fcbarcelona.com/en/football/first-team/players")
soup = BeautifulSoup(html, "html")

## FC Barcelone players

In [None]:
headers = {
    "accept": "application/json",
    "Notion-Version": "2022-06-28",
    "content-type": "application/json",
    "Authorization": "Bearer " + NOTION_API_KEY 
}

def create_player(player_name, player_national_team, player_age, player_pos, player_weight, player_height, player_num, player_img, player_promo, player_bio, player_honours):
    payload = {
        "parent": {
            "type": "database_id",
            "database_id": PLAYERS_DB_ID
        },
        
        "icon": {
            "type": "external",
            "external": {"url": player_img}
        },
        
        "cover": {
            "type": "external",
            "external": {"url": player_img}
        },
        
        "properties": {
            "Name": {
                "title": [{
                    "text": {"content": player_name}
                }]
            },
            "Club": {
                "relation": [{
                    "id": "75ab737cc8f341288f4ef0392a161067"
                }]
            },
            "National Team": {
                "rich_text": [{
                    "text": {"content": player_national_team}
                }]
            },
            "Age": {"number": player_age},
            "Position": {
                "select": {"name": player_pos}
            },
            "Weight": {"number": player_weight},
            "Height": {"number": player_height},
            "Jersey Number": {"number": player_num},
        },
        
        "children": [
            {
                "object":"block",
                "type": "heading_1",
                "heading_1" : {"rich_text": [{"text": {"content": player_promo}}]}
            },
            
            {
                "object": "block",
                "type": "divider",
                "divider": {}
            }
            
            ] + [
            {
                "object": "block",
                "type": "heading_3",
                "heading_3": {"rich_text": [{"text": {"content": honour}}]}
            }
        
        for honour in player_honours]
    }
    
    response = requests.post(url=NOTION_ENDPOINT+"pages/",headers=headers, json=payload)
    response.raise_for_status()

In [None]:
player_pos_tag = "player-hero__info-meta"
player_name_tag = "player-hero__name"
player_img_tag = "player-hero__img"
player_data_tag = "player-strip__data"
player_promo_tag = "content-promo__title"
player_bio_tag = "player-bio__description"

def find_player_info(soup, tag):
    return soup.find(attrs={"class":tag}).get_text()

def find_player_img(soup, tag):
    return soup.find(attrs={"class":tag}).attrs["src"]

def find_player_data(soup, player_data_tag):
    data = [tag.get_text() for tag in soup.find_all(attrs={"class":player_data_tag})]
    return data

def find_player_honours(soup):
    player_honour_data = []
    for elem in soup.find_all(attrs={"class":"player-honour"}):
        player_honour_title = elem.find(attrs={"class": "player-honour__title"}).get_text()
        player_honour_dates = elem.find(attrs={"class": "player-honour__dates"}).get_text().split()
        player_honour_dates = [dates for dates in player_honour_dates if len(dates)>1]
        player_honour_title_total = len(player_honour_dates)
        player_honour_dates = " | ".join(player_honour_dates)
        player_honour_data_meta = "{} {}🏆 → {}".format(player_honour_title, player_honour_title_total, player_honour_dates)
        player_honour_data.append(player_honour_data_meta)
    return player_honour_data

for link in soup.find_all(href=re.compile(r"https.*first-team")):
    try:
        player_soup = BeautifulSoup(urlopen(link.attrs["href"]),"html")
        player_soup = player_soup.find(attrs={"class":"teams-page"})
        
        player_data = {}
        
        player_name = find_player_info(player_soup,player_name_tag)
        player_name = re.sub("\n | \s+", " ", player_name).split()
        player_num = int(player_name[0])
        player_name = " ".join(player_name[1:])
        player_pos = find_player_info(player_soup, player_pos_tag)
        player_img = find_player_img(player_soup, player_img_tag)
        player_promo = find_player_info(player_soup, player_promo_tag)
        player_bio = find_player_data(player_soup, player_bio_tag)
        player_bio = " ".join(player_bio)
        
        player_data = find_player_data(player_soup, player_data_tag)
        player_national_team = player_age = player_weight = player_height = ""

        for elem in player_data:
            if re.match(r"\D", elem):
                player_national_team = elem.split(", ")[-1]
            else:
                if re.match(r"\d+/", elem) and int(elem.split("/")[-1])<2008:
                    player_age = 2023-int(elem.split("/")[-1])
                elif re.match(".*kg",elem):
                    player_weight = int(re.sub("kg","", elem))
                elif re.match(".*cm",elem):
                    player_height = int(re.sub("cm","", elem))
        player_honours = find_player_honours(player_soup)
        # print(player_name, player_national_team, player_age, player_pos, player_weight, player_height, player_num, player_img, player_honour_data)
        # create_player(player_name, player_national_team, player_age, player_pos, player_weight, player_height, player_num, player_img, player_promo, player_bio, player_honours)
        
    except:
        print("Data retrieval failed.")

## All La Liga Teams

In [None]:
# LA_LIGA_TEAMS_URL = "https://www.laliga.com/en-GB/laliga-santander"
# ONE_FOOTBALL_URL = "https://onefootball.com/en/competition/laliga-10/table"

# team_links = []
# html = requests.get(LA_LIGA_TEAMS_URL)
# soup = BeautifulSoup(html.text, "html")
# teams = soup.find(attrs={"class": "styled__ClubesHeaderContainer-sc-1azasvg-0"})
# for team in teams.find_all(name="a"):
#     team_link = team.attrs["href"]
#     # print(team_link)
#     team_links.append(team_link)
# teams = {}
# html = requests.get(ONE_FOOTBALL_URL)
# soup = BeautifulSoup(html.text, "html")

# for team_soup in soup.find_all(name="li", attrs={"class":"standings__row"}):
#     # print(team.get_text().split())
#     try:
#         team_title = team_soup.find(name="a").attrs["aria-label"]
#         pattern = unidecode.unidecode(team_title.lower()).split()
#         pattern1 = r".*{}".format(pattern[-1])
#         pattern2 = r".*{}".format("".join(pattern))
#         pattern3 = r".*{}".format("-".join(pattern))
#         pattern4 = r".*{}".format(pattern[0])
#         for i,link in enumerate(team_links):
#             if re.match(pattern1,link) and re.match(pattern2,link):
#                 teams[team_title] = [link]
#                 team_links.pop(i)
#             elif re.match(pattern3,link):
#                 teams[team_title] = [link]
#                 team_links.pop(i)

#         if not team_title in teams:
#             teams[team_title] = [LA_LIGA_TEAMS_URL]   
#     except:
#         pass
# # print(teams)

# for team in list(teams.keys()):
#     search_url = "https://en.wikipedia.org/wiki/football {}".format(team)
#     soup = BeautifulSoup(requests.get(search_url).text, "html")
#     link = soup.find(name="a", href=re.compile(r"^(https://en).*wikipedia.*(?<!edit)$")).attrs["href"]
#     soup = Beasoup = BeautifulSoup(requests.get(link).text, "html")
#     team_wiki_link = soup.find(attrs={"class":"mw-search-results-container"}).find(name="a").attrs["href"]
#     team_wiki_link = "https://en.wikipedia.org"+team_wiki_link
#     soup = Beasoup = BeautifulSoup(requests.get(team_wiki_link).text, "html")
#     team_logo = "https:"+soup.find(attrs={"class":"image"}).find(name="img").attrs["src"]
#     team_full_name = soup.find(name="h1").get_text()
#     teams[team] += [team_logo, team_full_name]


In [None]:
# print(teams["Celta Vigo"])

In [None]:
# headers = {
#     "accept": "application/json",
#     "Notion-Version": "2022-06-28",
#     "content-type": "application/json",
#     "Authorization": "Bearer " + NOTION_API_KEY 
# }
# def create_team(team_url, team_logo, team_full_name):
#     payload = {
#         "parent": {
#             "type": "database_id",
#             "database_id": TEAMS_DB_ID
#         },
        
#         "icon": {
#             "type": "external",
#             "external": {"url": team_logo}
#         },
        
#         "cover": {
#             "type": "external",
#             "external": {"url": team_logo}
#         },
        
#         "properties": {
#             "Name": {
#                 "title": [{
#                     "text": {"content": team_full_name}
#                 }]
#             },
#             "Team URL": {"url": team_url},
#             "League": {
#                 "relation": [{"id": LA_LIGA_PAGE_ID}]
#             }
#         }
#     }
#     response = requests.post(NOTION_ENDPOINT+"pages/", headers=headers, json=payload)
#     response.raise_for_status()

In [None]:
# for team in teams:
#     create_team(teams[team][0], teams[team][1], teams[team][2])

## Barca Calendar

In [None]:
def create_match(matchday, home_team, home_team_key, away_team, away_team_key, match_date):
    payload = {
        "parent": {
            "type": "database_id",
            "database_id": MATCHES_DB_ID
        },
        
        "properties": {
            "Name": {
                "title": [{
                    "text": {"content": "{} : {} vs {}".format(matchday, home_team, away_team)}
                }]
            },
            "Home Team": {"relation": [{"id": home_team_key}]},
            "Away Team": {"relation": [{"id": away_team_key}]},
            "Date": {"date": {"start": match_date}}
        }
    }
    response = requests.post(NOTION_ENDPOINT+"pages/", headers=headers, json=payload)
    response.raise_for_status()

In [None]:
BARCA_CALENDAR_URL  ="https://www.fcbarcelona.com/en/football/first-team/schedule"
BARCA_STADIUM = "Estadi Olímpic Lluís Companys"

def get_soup(url):
    html = requests.get(url).text
    soup = BeautifulSoup(html, "html")
    return soup

load_dotenv()

soup = get_soup(BARCA_CALENDAR_URL)
soup = soup.find(attrs={"class":"body-content"})
for link in soup.find_all(name="a", attrs={"class":"fixture-result-list__link"})[:1]:
    match_link = "https://www.fcbarcelona.com" + link.attrs["href"]
    # print(match_link)
    match_soup = get_soup(match_link)
    home_team = match_soup.find(attrs={"class":"fixture-info__name--home"}).get_text()
    away_team = match_soup.find(attrs={"class":"fixture-info__name--away"}).get_text()
    matchday = match_soup.find(attrs={"class":"match-details__value"}).get_text()
    match_date = match_soup.find(attrs={"class":"match-hero__date"}).get_text()
    # match_date += " 2023"
    match_date_time = datetime.datetime.strptime(match_date, "%a %d %b")
    if match_date_time.month in range(8,13):
        match_date += " 2023"
    else:
        match_date += " 2024"
    match_date = datetime.datetime.strptime(match_date, "%a %d %b %Y")
    match_date = match_date.strftime("%Y-%m-%d")
    
    home_team_key = os.getenv(unidecode.unidecode(home_team.replace(" ", "").replace(".","").lower()))
    away_team_key = os.getenv(unidecode.unidecode(away_team.replace(" ", "").replace(".","").lower()))
    create_match(matchday, home_team, home_team_key, away_team, away_team_key, match_date)
    # print(match_date)

In [1]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests, os, re, string, datetime, unidecode
from dotenv import load_dotenv


load_dotenv()
NOTION_API_KEY = os.getenv("NOTION_API_KEY")
PLAYERS_DB_ID = os.getenv("PLAYERS_DB_ID")
TEAMS_DB_ID = os.getenv("TEAMS_DB_ID")
MATCHES_DB_ID = os.getenv("MATCHES_DB_ID")
LA_LIGA_PAGE_ID = os.getenv("LA_LIGA_PAGE_ID")
NOTION_ENDPOINT = "https://api.notion.com/v1/"
BARCA_CALENDAR_URL  ="https://www.fcbarcelona.com/en/football/first-team/schedule"

headers = {
    "accept": "application/json",
    "Notion-Version": "2022-06-28",
    "content-type": "application/json",
    "Authorization": "Bearer " + NOTION_API_KEY 
}

def get_soup(url):
    html = requests.get(url).text
    soup = BeautifulSoup(html, "html")
    return soup

def create_match(matchday, home_team, home_team_key, away_team, away_team_key, match_date):
    payload = {
        "parent": {
            "type": "database_id",
            "database_id": MATCHES_DB_ID
        },
        
        "properties": {
            "Name": {
                "title": [{
                    "text": {"content": "{} : {} vs {}".format(matchday, home_team, away_team)}
                }]
            },
            "Home Team": {"relation": [{"id": home_team_key}]},
            "Away Team": {"relation": [{"id": away_team_key}]},
            "Date": {"date": {"start": match_date}},
            "League": {"relation": [{"id": LA_LIGA_PAGE_ID}]}
        }
    }
    response = requests.post(NOTION_ENDPOINT+"pages/", headers=headers, json=payload)
    response.raise_for_status()

def create_barca_calendar():
    
    load_dotenv()

    soup = get_soup(BARCA_CALENDAR_URL)
    soup = soup.find(attrs={"class":"body-content"})
    for link in soup.find_all(name="a", attrs={"class":"fixture-result-list__link"})[:1]:
        match_link = "https://www.fcbarcelona.com" + link.attrs["href"]
        # print(match_link)
        match_soup = get_soup(match_link)
        home_team = match_soup.find(attrs={"class":"fixture-info__name--home"}).get_text()
        away_team = match_soup.find(attrs={"class":"fixture-info__name--away"}).get_text()
        matchday = match_soup.find(attrs={"class":"match-details__value"}).get_text()
        match_date = match_soup.find(attrs={"class":"match-hero__date"}).get_text()
        # match_date += " 2023"
        match_date_time = datetime.datetime.strptime(match_date, "%a %d %b")
        if match_date_time.month in range(8,13):
            match_date += " 2023"
        else:
            match_date += " 2024"
        match_date = datetime.datetime.strptime(match_date, "%a %d %b %Y")
        match_date = match_date.strftime("%Y-%m-%d")
        
        home_team_key = os.getenv(unidecode.unidecode(home_team.replace(" ", "").replace(".","").lower()))
        away_team_key = os.getenv(unidecode.unidecode(away_team.replace(" ", "").replace(".","").lower()))
        create_match(matchday, home_team, home_team_key, away_team, away_team_key, match_date)
        # print(match_date)

## La Liga Calendar

In [52]:
from utils import get_soup
from match import create_match

In [53]:
LA_LIGA_CALENDAR_URL = "https://www.laliga.com/en-GB/laliga-santander/results/2023-24/gameweek-"




  soup = BeautifulSoup(html, "html")


In [79]:
def create_la_liga_calendar():
    for i in range(1,39):
        soup = get_soup(LA_LIGA_CALENDAR_URL+str(i))

        matchday = "Matchday {}".format(i)
        for row in soup.find(name="table").find_all(name="tr"):
            data = []
            if len(row.find_all(name="td"))>1:
                for elem in row.find_all(name="td"):
                    if re.match(r"\w+",elem.get_text()):
                        data.append(elem.get_text())
                match_date = data[0].split(" ")[-1]
                match_date = datetime.datetime.strptime(match_date, "%d.%m.%Y")
                match_date = match_date.strftime("%Y-%m-%d")
                teams = data[1].split("VS")
                home_team = teams[0].strip()
                away_team = teams[1].strip()
                load_dotenv()
                home_team_key = os.getenv(unidecode.unidecode(home_team.replace(" ", "").lower()))
                away_team_key = os.getenv(unidecode.unidecode(away_team.replace(" ", "").lower()))

                create_match(LA_LIGA_PAGE_ID,matchday, home_team, home_team_key, away_team, away_team_key, match_date)
                print("Fetch {} : {} vs {}".format(matchday, home_team, away_team))


c339f2a59abf4600b1a34eac4cb420a9 57876228bc104ab4bfabdb46509eb466
UD Almería Rayo Vallecano
Fetch Matchday 1 : UD Almería vs Rayo Vallecano
531cce3bcf9649fd8e8c027ff22cd4e7 02d54564671c4700844df6bc4235a476
Athletic Club Real Madrid
Fetch Matchday 1 : Athletic Club vs Real Madrid
05ed6e747bc74017a1da4691797217c0 f454536995a148358bbd3c55f7cce7f0
Atlético de Madrid Granada CF
Fetch Matchday 1 : Atlético de Madrid vs Granada CF
f5f2009158c343ddbda4546de2feae35 130adae17e1044b9a8171fc90ea062cd
RC Celta CA Osasuna
Fetch Matchday 1 : RC Celta vs CA Osasuna
978dca6591164c2d899141e5942f2b41 d90b5cfd4c5b4fc1917b094fc7d51849
Sevilla FC Valencia CF
Fetch Matchday 1 : Sevilla FC vs Valencia CF
6d7a351fef4c49f4a35beb989f071c42 71cca6a93e5848c9aaac5aa824f3b08c
UD Las Palmas RCD Mallorca
Fetch Matchday 1 : UD Las Palmas vs RCD Mallorca
524ea684beae430697c7a9457c13f701 32b667bf4e4d4704858d63321cecbfba
Getafe CF FC Barcelona
Fetch Matchday 1 : Getafe CF vs FC Barcelona
901411de67954c85af7bf15f16a081cc 1