In [None]:
import requests
from requests.auth import HTTPBasicAuth
import os
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import pandas as pd
import re
pd.set_option('display.max_rows', 900)

bets = BETS()
criteria = ['U19 Gold','U17 National Championships','U19 National Championships']
roundRequired = ['Winner','Winner','Winner','Winner']
yongestTournament = 'U16'
matchData,linksDf = get_tournament_results(bets,criteria,yongestTournament,roundRequired,'16/09/2024')
matchData
# R128 ... QF SF Final


### H2H From all tournaments - no county - In Progress
#   Display tournament results in date order.
#   Extra tab for scores
#   Think of ways to tell the story. 
#   Flag unfinished matches
## Make into a Shiny App

In [1]:
class BETS():
    def __init__(self):
        load_dotenv()
        self.clientId = os.getenv('CLIENT_ID')
        self.clientPw = os.getenv('CLIENT_PW')
        self.baseUrl = "https://api.tournamentsoftware.com/1.0"
    
    def make_call(self,url):
        response = requests.get(url, auth=HTTPBasicAuth(self.clientId, self.clientPw))
        return response

    #   USE SEARCH QUERY '?q=U11 Gold'
    #   Filter by dates
    #   Create lists of cool tourney codes.
    def search_tournaments(self,query):
        outputQuery = query.replace(' ','+')
        if query:
            url = self.baseUrl+f"/Tournament?q={outputQuery}"
        else:
            yearAgo = datetime.strftime(datetime.today() - timedelta(days=365),'%Y-%m-%d')
            url = self.baseUrl+f"/Tournament?list=1&refdate={yearAgo}&pagesize=1000" 
        return self.make_call(url)
    
    def get_tournament_details(self,tCode):
        url = self.baseUrl+f"/Tournament/{tCode}"
        return self.make_call(url)

    def get_matches(self,tCode,date):
        dateQuery = date.strftime("%Y%m%d")
        url = self.baseUrl+f"/Tournament/{tCode}/Match/{dateQuery}"
        return self.make_call(url)
    
    def wanted_rounds(self,roundCaredAbout):
        allRounds = ['R128', 'R64', 'R32', 'R16', 'QF', 'SF', 'Final']
        if roundCaredAbout == 'Winner':
            return ['Final']
        elif roundCaredAbout in allRounds:
            index = allRounds.index(roundCaredAbout)
            return allRounds[index:]
        else:
            return ['','','','','','','']


In [2]:
def processXMLList(list):
    out = []
    for xml in list:
        out.append(xml.text)
    return out

def exclude_list(inputs,youngestTournament):
    output = ['invite','(1000ie)','masters','senior county','para','yonex','league','regional','schools','graded','rising']
    startingAge = int(youngestTournament[1:])
    if startingAge > 13:
        output.append('futures')        
    for i in range(startingAge-1,10,-1):
        output.append(f'u{i}')
        
    return output
   
#Both inputs as strings, searchStart = d/m/y, searchText = 'U11 Gold'
def get_tournament_results(bets,searchText,youngestTournament,roundsWanted,searchStart):
    tabuList = exclude_list(searchText,youngestTournament)
    searchStart = datetime.strptime(searchStart,"%d/%m/%Y")
    tourneysDf = pd.DataFrame(columns=['Name','Code','StartDate','EndDate'])
    tourneysToAdd = []
    searchText.append('')
    roundsWanted.append('')
    for run,search in enumerate(searchText):

        if not search: #tournaments are for h2h only, not for selection
            tournamentPrio = 'secondary'
        else:
            tournamentPrio = 'primary'

        out = bets.search_tournaments(search).content
        soup = BeautifulSoup(out,'xml')
        tournaments = soup.find_all('Tournament')

        for i in range(0,len(tournaments)):  
            name = tournaments[i].find('Name').text
            code = tournaments[i].find('Code').text
            startDate = datetime.fromisoformat(tournaments[i].find('StartDate').text)
            endDate = datetime.fromisoformat(tournaments[i].find('EndDate').text)

            tType = tournaments[i].find('TypeID').text
            
            goodTourney = True

            if not tType == '0':
                goodTourney = False
            
            for t in tabuList:
                if t in name.lower():
                    goodTourney = False

            if not any(existing_tourney['Name'] == name for existing_tourney in tourneysToAdd) and goodTourney:
                tourney = {
                    "Name": name,
                    "Code": code,
                    "StartDate": startDate,
                    "EndDate": endDate,
                    "Min_Round": roundsWanted[run],
                    "Prio": tournamentPrio,
                    "Search": search
                }
    
                tourneysToAdd.append(tourney)

    tourneysDf = pd.concat([tourneysDf, pd.DataFrame(tourneysToAdd)], ignore_index=True)
    if searchStart < datetime.today():
        searchDate = searchStart
    else:
        searchDate = datetime.today() - timedelta(days = 2)

    # Define searchDate and dtYearBefore
    dtYearBefore = searchDate - timedelta(days=365)
    
    # Apply condition and filter rows
    tourneysInYearDf = tourneysDf[(tourneysDf['EndDate'] >= dtYearBefore) & (tourneysDf['StartDate'] <= searchDate) & (tourneysDf['EndDate'] < searchDate)].copy()

    linksDf = tourneysInYearDf[['Name','Code','StartDate','Prio','Search']]

    def code_to_link(code):
        return f'https://be.tournamentsoftware.com/tournament/{code}'
    
    linksDf['Code'] = linksDf['Code'].apply(code_to_link)


    #Itterate through Tournaments
    match_data = []
    for index, row in tourneysInYearDf.iterrows():
        roundWanted = row['Min_Round']
        dtCurrentDay = row['StartDate']
        dtEndDate = row['EndDate']
        code = row['Code']
        tName = row['Name']
        tPrio = row['Prio']
        
        #Itterate through Days With Matches.
        allMatchesXML = [] 
        while dtCurrentDay <= dtEndDate:
            tDetails = bets.get_matches(row['Code'],dtCurrentDay)
            matchesXML = tDetails.content
            allMatchesXML.append(matchesXML)
            dtCurrentDay = dtCurrentDay + timedelta(days=1)

        # Iterate over each Match element
        for matchesXML in allMatchesXML:
            soup = BeautifulSoup(matchesXML,'xml')
            for match in soup.find_all('Match'):
                #check if match went ahead:
                if not match.find('ScoreStatus').text == '0':
                    pass
                else:
                    match_info = {
                        'Tournament': tName,
                        'Code': match.find('Code').text,
                        'Winner': match.find('Winner').text,
                        'ScoreStatus': match.find('ScoreStatus').text,
                        'RoundName': match.find('RoundName').text,
                        'EventName': match.find('EventName').text,
                    }
                    # Extract team and player details
                    teams = []
                    for team in ['Team1', 'Team2']:
                        teamXML = match.find(team)
                        team_info = {}
                        #find player id's and if singles
                        team_info[f'{team}_MemberID'] = processXMLList(teamXML.find_all('MemberID'))
                        team_info[f'{team}_Firstname'] = processXMLList(teamXML.find_all('Firstname'))
                        team_info[f'{team}_Lastname'] = processXMLList(teamXML.find_all('Lastname'))
                        team_info[f'{team}_GenderID'] = processXMLList(teamXML.find_all('GenderID'))

                        teams.append(team_info)

                    match_info.update(teams[0])
                    match_info.update(teams[1])

                    # Extract sets scores
                    sets = match.find('Sets')
                    set_scores = []
                    if sets:
                        for set_ in sets.find_all('Set'):
                            set_scores.append({
                                'Set_Team1': set_['Team1'],
                                'Set_Team2': set_['Team2']
                            })
    
                        match_info['Set_Scores'] = set_scores
                    
                    match_info['Min_Round'] = roundWanted

                    match_info['Prio'] = tPrio

                    # Append the structured match information to the list
                    match_data.append(match_info)

            # Convert the list to a DataFrame
    df = pd.DataFrame(match_data)
    return df,linksDf

def next_func():
    pass

In [4]:
def clean_event(event):
    match = re.search(r'U(1[0-9])', event)
    if match:
        e = event.split(' ')
        out = ''
        if len(e) > 1:
            for s in e:
                if not(('U' in s) or ('&' in s)):
                    out = out + s.strip()
            event = out
        else:
            event = event.replace(match.group(0),'').strip()

    event = re.sub(r'\d', '', event)

    if 'open' in event.lower():
        if 'd' in event.lower():
            event = 'md'
        else:
            event = 'ms'


    elif 'ladi' in event.lower():
        if ' ' in event:
            if 'd' in event.split(' ')[1].lower():
                event = 'wd'
            else:
                event = 'ws'
        else:
            if 'singles' in event.lower():
                event = 'ws'
            else:
                event = 'wd'

    elif 'men' in event.lower():
        if 'd' in event.lower():
            event = 'md'
        else:
            event = 'ms'

    elif 'mixe' in event.lower():
        event = 'xd'
    
    elif 'boy' in event.lower():
        if 'd' in event.lower():
            event = 'md'
        else:
            event = 'ms'
    
    elif 'girl' in event.lower():
        if 'd' in event.lower():
            event = 'wd'
        else:
            event = 'ws'

    elif 'mx' in event.lower():
        event = 'xd'

    else:
        event = event.lower().replace('o','m').replace('g','w').replace('b','m').replace('*','').replace('/','').split(' ')[0].split('u')[0]
    
    return event

def check_for_ids(event,t1ID,t2ID):
    if 'd' in event:
        if (not len(t1ID) > 1) or (not len(t2ID) > 1): 
            return False
        for id_ in t1ID:
            if not id_:
                return False
        for id_ in t2ID:
            if not id_:
                return False
        return True
    else:
        if t1ID and t2ID:
            return True
    return False
        
def add_match(matchList,tournament,event,winningTeam,t1Name,t1ID,t2Name,t2ID,scores,code):
    #Add match as a row to the df. append a dict to matchList
    for i,e in enumerate(['ms','ws','md','wd','xd']):
        if event == e:
            matches = matchList[i]
    


likelyPlayersDf = pd.read_csv('likely_players.csv')
likelyPlayersDf['Beat'] = [[] for _ in range(len(likelyPlayersDf))]
likelyPlayersDf['Temp'] = ''
primaryTournaments = matchData[matchData['Prio'] == 'primary']
uniqueTourneys = primaryTournaments['Tournament'].unique()
for tName in uniqueTourneys:
    for index, row in likelyPlayersDf.iterrows():
        if 's' in likelyPlayersDf.at[index, 'Event']:
            likelyPlayersDf[tName] = ''
        else:
            likelyPlayersDf[tName] = ' - '

msDf = likelyPlayersDf[likelyPlayersDf['Event'] == 'ms']  # Men's Singles
wsDf = likelyPlayersDf[likelyPlayersDf['Event'] == 'ws']  # Women's Singles
mdDf = likelyPlayersDf[likelyPlayersDf['Event'] == 'md']  # Men's Doubles
wdDf = likelyPlayersDf[likelyPlayersDf['Event'] == 'wd']  # Women's Doubles

xdDf = likelyPlayersDf[likelyPlayersDf['Event'] == 'xd']  # Mixed Doubles
mxdDf = xdDf.iloc[:len(xdDf)//2]
wxdDf = xdDf = xdDf.iloc[len(xdDf)//2:]

eventDfList = [msDf,wsDf,mdDf,wdDf,mxdDf,wxdDf]

matchList = [[],[],[],[],[]]

def get_event_df(event,eventDfList,gender = 0):
    if event == 'ms':
        return(eventDfList[0])
    if event == 'ws':
        return(eventDfList[1])
    if event == 'md':
        return(eventDfList[2])
    if event == 'wd':
        return(eventDfList[3])
    if event == 'mxd':
        return(eventDfList[4])
    if event == 'wxd':
        return(eventDfList[5])


bets = BETS()
for index, row in matchData.iterrows():
    round = row['RoundName']
    roundsWanted = bets.wanted_rounds(row['Min_Round'])
    tournament = row['Tournament']
    event = clean_event(row['EventName'])
    prio = row['Prio']        
    winningTeam = row['Winner']
    t1ID = [num.replace("BE", "") for num in row['Team1_MemberID']]
    t2ID = [num.replace("BE", "") for num in row['Team2_MemberID']]
    idsExist = check_for_ids(event,t1ID,t2ID)
    t1Gender = row['Team1_GenderID']
    t2Gender = row['Team2_GenderID']
    code = row['Code']

    if 's' in event:
        try:
            t1Name = (str(row['Team1_Firstname']) + ' ' + str(row['Team1_Lastname'])).replace("'",'').replace('[','').replace(']','')
            t2Name = (str(row['Team2_Firstname']) + ' ' + str(row['Team2_Lastname'])).replace("'",'').replace('[','').replace(']','')
        except IndexError as e:
            print(f'{tournament} - {event} - {round} has an error with player names')
            idsExist = False
    else:
        try:
            teamNames = []
            for team in ['Team1','Team2']:
                names = []
                for i in range(0,2):
                    name = str((row[f'{team}_Firstname'][i]) + ' ' + str(row[f'{team}_Lastname'][i])).replace("'",'').replace('[','').replace(']','')
                    names.append(name)
                teamNames.append(names)
            t1Name = teamNames[0]
            t2Name = teamNames[1]
        except IndexError as e:
            print(f'{tournament} - {event} - {round} has an error with player names')
            idsExist = False

    scores = row['Set_Scores']

    eventDf = get_event_df(event,eventDfList)
    
    if idsExist:
        add_match(matchList,tournament,event,winningTeam,t1Name,t1ID,t2Name,t2ID,scores,code)
        if 's' in event: # Singles
            if str(winningTeam) == '1':
                winnerID = str(t1ID)[2:-2]
                winnerName = t1Name
                looserID = str(t2ID)[2:-2]
                looserName = t2Name
            else:
                winnerID = str(t2ID)[2:-2]
                winnerName = t2Name
                looserID = str(t1ID)[2:-2]  
                looserName = t1Name    

            ids = [winnerID,looserID]
            for i,id in enumerate(ids):
                try:
                    matching_row = eventDf[(eventDf['ID'] == int(id)) & (eventDf['Event'] == event)] #Find Winner
                except:
                    print(f"{id} found in id column...")
                if not matching_row.empty:
                    # Get the index of the matching row
                    idx = matching_row.index[0]
                    # Append the t2ID to the 'Beat' list in the matching row
                    if i == 0 and not (eventDf[eventDf['ID'] == int(ids[1])].empty):
                        eventDf.at[idx, 'Beat'].append(ids[1])
                    #Find best round
                    if (row['RoundName'] in str(roundsWanted)) and prio == 'primary':
                        currentBest = eventDf.at[idx,tournament]
                        if len(bets.wanted_rounds(currentBest)) >= len(bets.wanted_rounds(round)):
                            if i == 0:
                                eventDf.at[idx, 'Temp'] = 'win'
                            else:
                                eventDf.at[idx, 'Temp'] = 'loose'   

                            if currentBest == 'Winner':
                                pass
                            else:
                                if round == 'Final' and i == 0:
                                    eventDf.at[idx,tournament] = 'Winner'
                                else:
                                    eventDf.at[idx,tournament] = round                        
                    
        elif event == 'xd':   #Mixed Dubs
            runs = ['mxd','wxd']
            for run in range(0,2):
                eventDf = get_event_df(runs[run],eventDfList)
                if str(winningTeam) == '1':
                    winnerID = str(t1ID[run])
                    winnerName = t1Name[run]
                    looserID = str(t2ID[run])
                    looserName = t2Name[run]
                else:
                    winnerID = str(t2ID[run])
                    winnerName = t2Name[run]
                    looserID = str(t1ID[run])
                    looserName = t1Name[run]      

                ids = [winnerID,looserID]
                for i,id in enumerate(ids):
                    try:
                        matching_row = eventDf[(eventDf['ID'] == int(id)) & (eventDf['Event'] == event)] #Find Winner
                    except:
                        pass
                    if not matching_row.empty:
                        # Get the index of the matching row
                        idx = matching_row.index[0]
                        # Append the t2ID to the 'Beat' list in the matching row
                        if i == 0 and not (eventDf[eventDf['ID'] == int(ids[1])].empty):
                            eventDf.at[idx, 'Beat'].append(ids[1])
                        #Find best round
                        if (row['RoundName'] in str(roundsWanted)) and prio == 'primary':
                            currentBest = eventDf.at[idx,tournament].split(' - ')[0]
                            if len(bets.wanted_rounds(currentBest)) >= len(bets.wanted_rounds(round)):      
                                if run == 0 and winningTeam == '1':
                                    partner = f'{t1Name[1]} ({t1ID[1]})'
                                elif run == 0 and winningTeam == '2':
                                    partner = f'{t2Name[1]} ({t2ID[1]})'
                                elif run == 1 and winningTeam == '1':
                                    partner = f'{t1Name[0]} ({t1ID[0]})'
                                elif run == 1 and winningTeam == '2':
                                    partner = f'{t2Name[0]} ({t2ID[0]})'

                                if i == 0:
                                    eventDf.at[idx, 'Temp'] = 'win'
                                else:
                                    eventDf.at[idx, 'Temp'] = 'loose'   

                                if currentBest == 'Winner':
                                    pass
                                else:
                                    if round == 'Final' and i == 0:
                                        eventDf.at[idx,tournament] = 'Winner - ' + partner
                                    else:
                                        eventDf.at[idx,tournament] = round + ' - ' + partner

        else:
            if str(winningTeam) == '1':
                winnerIDs = t1ID
                winnerNames = t1Name
                looserIDs = t2ID
                looserNames = t2Name
            else:
                winnerIDs = t2ID
                winnerNames = t2Name
                looserIDs = t1ID
                looserNames = t1Name

            ids = [winnerIDs,looserIDs]
            names = [winnerNames,looserNames]
            for i,id in enumerate(ids):
                for memberNum,memberID in enumerate(id):
                    try:
                        matching_row = eventDf[(eventDf['ID'] == int(memberID)) & (eventDf['Event'] == event)] #Find Winner
                    except:
                        pass
                    if not matching_row.empty:
                        # Get the index of the matching row
                        idx = matching_row.index[0]
                        # Append the t2ID to the 'Beat' list in the matching row
                        if i == 0 and not (eventDf[eventDf['ID'] == int(ids[1][0])].empty):
                            eventDf.at[idx, 'Beat'].append(ids[1][0])
                        if i == 0 and not (eventDf[eventDf['ID'] == int(ids[1][1])].empty):
                            eventDf.at[idx, 'Beat'].append(ids[1][1])

                        #Find best round
                        if (row['RoundName'] in str(roundsWanted)) and prio == 'primary':
                            currentBest = eventDf.at[idx,tournament].split(' - ')[0]
                            if len(bets.wanted_rounds(currentBest)) >= len(bets.wanted_rounds(round)):
                                if memberNum == 1:
                                    partner = f'{names[i][0]} ({id[0]})'
                                else:
                                    partner = f'{names[i][1]} ({id[1]})'

                                if i == 0:
                                    eventDf.at[idx, 'Temp'] = 'win'
                                else:
                                    eventDf.at[idx, 'Temp'] = 'loose' 

                                if currentBest.split(' - ') == 'Winner':
                                    pass
                                else:
                                    if round == 'Final' and i == 0:
                                        eventDf.at[idx,tournament] = 'Winner - ' + partner
                                    else:
                                        eventDf.at[idx,tournament] = round + ' - ' + partner

for index, row in eventDf.iterrows():
    if prio == 'primary':
        try:
            places = row[tournament].split(' - ')[0]
            partner = row[tournament].split(' - ')[1]
        except:
            places = row[tournament]
        if not (places == 'Final' or places == 'Winner') and places:
            if row['Temp'] == 'win':
                if partner:
                    eventDf.at[index, tournament] = str(bets.wanted_rounds(places)[1]) + ' - ' + partner
                else:
                    eventDf.at[index, tournament] = bets.wanted_rounds(places)[1]

        
def cull_loosers(string):
    if string:
        if not ('winner' in string.lower()):
            return '-'
        else:
            return 'Winner'

for df in eventDfList:
    i = 0
    for col_name, col_data in df.items():
        if i > 4:
            matching_row = linksDf[(linksDf['Name'] == col_name)]
            search = matching_row['Search']
            idx = search.index[0]
            round_ = linksDf.at[idx, 'Search']
            searchIndex = criteria.index(round_)
            if roundRequired[searchIndex] == 'Winner':
               df[col_name] = col_data.apply(cull_loosers)
        i += 1

     

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col_name] = col_data.apply(cull_loosers)


In [None]:
print(eventDfList[0])

In [5]:
targetPlayersDfs = []
for df in eventDfList:
    cols_after_5 = df.iloc[:, 5:]
    # Create a mask to check if any value in the row is not equal to "-"
    mask = (cols_after_5 != " - ").any(axis=1)
    # Filter the original DataFrame based on the mask
    PlayersToTrack = df[mask]
    targetPlayersDfs.append(PlayersToTrack)

targetPlayersDfs[0]

Unnamed: 0,Name,Event,ID,Beat,Temp,Kent U19 Gold 2024,Derby University U19 Gold,U17 English National Championships 2023,U19 English National Championships 2023
0,Jason Ou,ms,1311945,"[1284382, 1284382, 1268282, 1255662, 1302102, ...",win,Winner,Winner,-,-
1,Oliver Nicolson,ms,1255662,[1317749],loose,-,-,-,-
2,Oliver Wu,ms,1268282,"[1342118, 1281105, 1309842, 1307845, 1302102, ...",loose,-,-,-,-
3,James Song,ms,1309842,"[1345941, 1283214, 1281105, 1268282, 1290940, ...",win,-,-,Winner,-
4,David Ng,ms,1302102,"[1359001, 1284382, 1255662, 1284382]",,-,-,-,-
5,Daniel Wykes,ms,1284382,"[1345941, 1342118, 1359001, 1309842, 1345941]",,-,-,-,-
6,Kalyan Manoj,ms,1345941,"[1317378, 1302102, 1297707]",,-,-,-,-
7,Dillon Chong,ms,1248118,[1359001],,-,-,-,-
8,Leon Crayford,ms,1307845,"[1290940, 1255662]",,-,-,-,-
9,Aaren SUM,ms,1365809,"[1342118, 1269924, 1306977]",,-,-,-,-


In [20]:
h2hMatrix = []
id_to_name_list = []
name_to_id_list = []

for df in targetPlayersDfs:# Get all unique player IDs
    unique_ids = df['ID'].unique()  
    head_to_head_matrix = pd.DataFrame('-', index=unique_ids, columns=unique_ids)
    # Create a dictionary to store the counts for both wins and losses
    head_to_head_counts = {id_: {id2: [0, 0] for id2 in unique_ids} for id_ in unique_ids}

    playerIds = df['ID'].tolist()

    # Fill in the counts based on the Beat column
    for idx, row in df.iterrows():
        winner_id = row['ID']  # The player who won
        beaten_ids = row['Beat']  # The players they beat
        for beaten_id in beaten_ids:
            if beaten_id in str(playerIds):
                head_to_head_counts[int(winner_id)][int(beaten_id)][0] += 1  # Increment winner's wins
                head_to_head_counts[int(beaten_id)][int(winner_id)][1] += 1  # Increment beaten player's losses

    # Convert the counts into the "Wins-Losses" format
    for winner_id in unique_ids:
        for opponent_id in unique_ids:
            wins, losses = head_to_head_counts[winner_id][opponent_id]
            if wins > 0 or losses > 0:
                head_to_head_matrix.loc[winner_id, opponent_id] = f"{wins}-{losses}"

    # Create a mapping from IDs to names
    id_to_name = dict(zip(df['ID'], df['Name']))
    name_to_id = dict(zip(df['Name'], df['ID']))
    id_to_name_list.append(id_to_name)
    name_to_id_list.append(name_to_id)
    
    # Replace the index and columns of the head-to-head matrix with player names
    head_to_head_matrix = head_to_head_matrix.rename(index=id_to_name, columns=id_to_name)

    # Display the head-to-head matrix with names instead of IDs
    head_to_head_matrix.columns.name = 'Beaten Player'
    head_to_head_matrix.index.name = 'Winning Player'
    h2hMatrix.append(head_to_head_matrix)
    
    # Make a copy of h2h matrix so id_matrix has the urls eventually
    
    id_matrix = list(h2hMatrix)
    
    # Rename id_matrix so that it has the ids so the h2h links can be generated 
    
    for i, index in enumerate(id_matrix):
        id_matrix[i] = id_matrix[i].rename(index=name_to_id_list[i], columns=name_to_id_list[i])

In [8]:
def head_url(t2p1_member_id, t1p1_member_id):
    base_url = "https://be.tournamentsoftware.com/head-2-head"
    organization_code = "F3676CE8-5988-4343-B9CF-52CE5F0B73CC"
    
    # Construct the full URL with injected member IDs
    full_url = f"{base_url}?OrganizationCode={organization_code}&T2P1MemberID={t2p1_member_id}&T1P1MemberID={t1p1_member_id}"
    return full_url

def ifPlayed(matrix, row, column):
    if (matrix.iloc[row, column] != "-"):
        return 1

In [21]:
# Get url to h2h link by using both player ids, then place it into id_matrix

for l,df in enumerate(id_matrix):
    for i in range(len(df)):  # Get the length of the DataFrame
        for j in range(len(df.columns)):  # Get the number of columns
            if ifPlayed(df, i, j) == 1:
            # Correctly accessing the row and column names using .iloc[] and .index[]
                row_label = df.index[i]
                col_label = df.columns[j]
                df.iloc[i, j] = head_url(col_label, row_label)      


In [10]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait  # Import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC  # Import expected_conditions
import time

def deal_with_cookies(driver):
    try:
        accept_button = driver.find_element(by=By.XPATH, value='/html/body/div/div/div/main/form/div[1]/button[1]/span')
        time.sleep(2)
        accept_button.click()     
    except:
        pass
    try:
        accept_button = driver.find_element(by=By.XPATH, value='/html/body/div[3]/div[2]/div[1]/div[2]/div[2]/button[2]')
        time.sleep(2)
        accept_button.click()     
    except:
        pass

# Set up Selenium with Safari (SafariDriver)

def get_url_from_search(search_term, driver):
    
    # Open the webpage
    driver.get('https://be.tournamentsoftware.com')
    deal_with_cookies(driver)

    search_box = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.ID, 'MastheadSearchInput'))
)

    # Enter the search term into the search box
    search_box.send_keys(search_term)

    # Submit the search
    search_box.send_keys(Keys.RETURN)

    # Wait for results to load
    time.sleep(3)

    link = driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[2]/div[3]/div/ul/li[3]/div/div/ul/div/div/div/h5/a')
    url = link.get_attribute('href')
    
    return url

In [11]:
#Using the scraping function that creates a dictionary of ids to player profile urls

driver = webdriver.Safari()

id_dicts = []

for df_index, df in enumerate(id_matrix):
    id_dict = {}
    df: pd.DataFrame = id_matrix[df_index]
    for col, player_id in enumerate(df.columns):  # Start from the second column
        id_dict[player_id] = get_url_from_search(player_id, driver)
    
    id_dicts.append(id_dict)
    
driver.quit()

# Then can rename the header and index names to the url link to player profiles

for i, index in enumerate(id_matrix):
    id_matrix[i] = id_matrix[i].rename(index=id_dicts[i], columns=id_dicts[i])

In [23]:
import pandas as pd
from openpyxl import load_workbook

outputDfs = []
for df in targetPlayersDfs:
    df = df.drop(['Beat', 'Temp', 'Event'], axis=1)
    outputDfs.append(df)

dfHeaddigns = ['MS','WS','MD','WD','MXD','WXD']

#    linksDf['Code'] = linksDf['Code'].apply(code_to_link)

tourneyDeetsDf = pd.DataFrame()
def format_time(time):
    date = str(time).split(' ')[0]
    date_obj = datetime.strptime(date, '%Y-%m-%d')
    return(date_obj.strftime('%d/%m/%Y'))

tourneyDeetsDf['Tournament'] = linksDf['Name']
tourneyDeetsDf['Link'] = linksDf['Code']
tourneyDeetsDf['Start Date'] = linksDf['StartDate'].apply(format_time)

numPrios = linksDf['Prio'].tolist().count('primary')
first_part = tourneyDeetsDf.iloc[:numPrios]
second_part = tourneyDeetsDf.iloc[numPrios:]
empty_row = pd.DataFrame([["", "", ""]], columns=tourneyDeetsDf.columns)
tourneyDeetsDf = pd.concat([first_part, empty_row, second_part], ignore_index=True)

h2hLengths = []
h2hLongestNames = []
for i,df in enumerate(h2hMatrix):
    colList = df.columns.tolist()
    for j,name in enumerate(colList):
        if j < len(h2hLengths):
            currentLongest = h2hLengths[j]
            if len(name) > currentLongest:
                h2hLengths[j] = len(name)
                h2hLongestNames[j] = name
        else:
            h2hLengths.append(len(name))
            h2hLongestNames.append(name)
h2hLengths.insert(0,max(h2hLengths))


with pd.ExcelWriter('script-results.xlsx', engine='xlsxwriter') as writer:

    outputDfs[0].to_excel(writer, sheet_name='Tournaments', startrow=1, index=False)  
    worksheet = writer.sheets['Tournaments']

    tourneyDeetsDf.to_excel(writer,sheet_name='Tournaments',startrow=1,startcol=15,index=False)
    for i, col in enumerate(tourneyDeetsDf.columns):
        colString = tourneyDeetsDf[col].astype(str)
        max_length = max(colString.map(len).max(), len(col)) + 2  # Adding some padding
        worksheet.set_column(i+15, i+15, max_length)

    worksheet.write(0, 0, 'MS')  # Write heading
    for i, col in enumerate(outputDfs[0].columns):
        max_length = max(outputDfs[0][col].astype(str).map(len).max(), len(col)) + 2  # Adding some padding
        worksheet.set_column(i, i, max_length)

    # Write first dataframe with heading
    workbook = writer.book
    worksheet = writer.sheets['Tournaments']
    row_position = len(outputDfs[0]) + 3
    for i in range(1,len(outputDfs)):
        worksheet.write(row_position, 0, dfHeaddigns[i])  # Add a heading for the first DataFrame
        outputDfs[i].to_excel(writer, sheet_name='Tournaments', startrow=row_position + 1, index=False)
        row_position += len(outputDfs[i]) + 3


# Create an Excel writer
    for index in range(len(h2hMatrix)):
        df1 = h2hMatrix[index]
        df2 = id_matrix[index]

        # Create a new DataFrame to hold hyperlinks and display names
        hyperlinked_df = pd.DataFrame(index=df1.index, columns=df1.columns)

        for i in range(hyperlinked_df.shape[0]):
            for j in range(hyperlinked_df.shape[1]):
                display_value = df1.iat[i, j]
                url = df2.iat[i, j]
                if display_value != '-':
                    hyperlinked_df.iat[i, j] = f'=HYPERLINK("{url}", "{display_value}")'
                else:
                    hyperlinked_df.iat[i, j] = display_value

        # Define sheet name for each DataFrame
        sheet_name = f'H2H_{dfHeaddigns[index]}'

        # Write to the corresponding sheet
        hyperlinked_df.to_excel(writer, sheet_name=sheet_name, startrow=1, startcol=1, header=False)
        workbook = writer.book
        worksheet = writer.sheets[sheet_name]

        # Writing the header row with hyperlinks
        for col_num, value in enumerate(hyperlinked_df.columns):
            url = df2.columns[col_num]  # Assuming each column name corresponds to a URL
            worksheet.write_url(0, col_num + 2, f"{url}", string=value)  # Adjust the URL as needed

        # Writing the index column with hyperlinks
        for row_num, value in enumerate(hyperlinked_df.index):
            url = df2.index[row_num]  # Assuming each index name corresponds to a URL
            worksheet.write_url(row_num + 1, 1, f"{url}", string=value)  # Adjust the URL as needed
            
        for col_num in range(hyperlinked_df.shape[1]):
            worksheet.set_column(col_num + 1, col_num + 1, 10)
