In [5]:
import re
import random
import time
import json
import tabula
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from itertools import combinations
from selenium import webdriver
from sklearn.neighbors import KernelDensity
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.action_chains import ActionChains

# Scraping

In [69]:
def scraping_names_from_pdf():
    # Lee el archivo PDF como un DataFrame de pandas
    dfM = pd.read_excel("Men26Jun.xlsx")
    dfW = pd.read_excel("Women26Jun.xlsx")
    
    # Guarda en singleMen todas las filas que tengan una 'x' en la columna Singles
    # y de cada fila solo guarda lo que esta en la columna Athlete (el nombre)
    singleMen = dfM[dfM['SINGLES'] == 'x']['ATHLETE'].tolist()
    with open('singleMen.json', 'w') as f:
        json.dump(singleMen, f)
    
    # Guarda en teamsMen todas las filas que tengan una 'x' en la columna Teams
    # y de cada fila solo guarda lo que esta en la columna Athlete (el nombre)
    teamsMen = dfM[dfM['TEAMS'] == 'x']['ATHLETE'].tolist()
    with open('teamsMen.json', 'w') as f:
        json.dump(teamsMen, f)
    
    # Guarda en singleWomen todas las filas que tengan una 'x' en la columna Singles
    # y de cada fila solo guarda lo que esta en la columna Athlete (el nombre)
    singleWomen = dfW[dfW['SINGLES'] == 'x']['ATHLETE'].tolist()
    with open('singleWomen.json', 'w') as f:
        json.dump(singleWomen, f)
    
    # Guarda en teamsWomen todas las filas que tengan una 'x' en la columna Teams
    # y de cada fila solo guarda lo que esta en la columna Athlete (el nombre)
    teamsWomen = dfW[dfW['TEAMS'] == 'x']['ATHLETE'].tolist()
    with open('teamsWomen.json', 'w') as f:
        json.dump(teamsWomen, f)
    
    # Guarda en mixed todas las filas que tengan una 'x' en la columna Mixed doubles
    # y de cada fila solo guarda lo que esta en la columna Athlete (el nombre)
    mixedW = dfW[dfW['MIXED DOUBLES'] == 'x']['ATHLETE'].tolist()
    mixedM = dfM[dfM['MIXED DOUBLES'] == 'x']['ATHLETE'].tolist()
    
    mixed = mixedM + mixedW   
    with open('mixed.json', 'w') as f:
        json.dump(mixed, f)
    
    selectedW = dfW[dfW['ATHLETE'].str.len() > 4]['ATHLETE'].tolist()
    selectedM = dfM[dfM['ATHLETE'].str.len() > 4]['ATHLETE'].tolist()
    
    selectedAll = selectedW + selectedM
    
    # Lista para guardar los nombres reordenados
    reordered_list = []

    for name in selectedAll:
        reordered_name = ""
        split_string = name.split(' ')
        if len(split_string) == 3:
            if split_string[0][1].isupper():
                if split_string[1][1].isupper():
                    reordered_name = split_string[0] + " " + split_string[1] + " " + split_string[2]
                elif split_string[2][1].isupper():
                    reordered_name = split_string[0] + " " + split_string[2] + " " + split_string[1]
                else:
                    reordered_name = split_string[0] + " " + split_string[1] + " " + split_string[2]
            elif split_string[1][1].isupper():
                if split_string[2][1].isupper():
                    reordered_name = split_string[1] + " " + split_string[2] + " " + split_string[0]
                else:
                    reordered_name = split_string[1] + " " + split_string[0] + " " + split_string[2]
            else:
                reordered_name = split_string[2] + " " + split_string[0] + " " + split_string[1]
        else:
            if split_string[0][1].isupper():
                reordered_name = split_string[0] + " " + split_string[1]
            else:
                reordered_name = split_string[1] + " " + split_string[0]
        
        # Añade el nombre reordenado a la lista
        reordered_list.append(reordered_name)
    
    with open('clasificated.json', 'w') as f:
        json.dump(reordered_list, f)

In [70]:
scraping_names_from_pdf()

In [53]:
def results(name1, driver):
    
    table_data = []
    
    wait = WebDriverWait(driver, 15)    # Espera a que el botón esté visible o pasen 5 segundos
    
    playerA = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[type="search"][name="fabrik_list_filter_all_29_com_fabrik_29"]')))
    playerA.clear()
    playerA.send_keys(name1)
    playerA.send_keys(Keys.RETURN)
    

    time.sleep(5)
    
    table = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "table.table.table-striped.table-condensed.table-sm")))
    rows = table.find_elements(By.XPATH, ".//tbody/tr")
    
    # Extrae los encabezados de la tabla
    headers = table.find_elements(By.XPATH, ".//thead/tr/th")
    final_headers = []
    for header1 in headers:
        header2 = header1.find_element(By.XPATH, ".//span")
        final_headers.append(header2.text)
    table_data.append(final_headers)
        
    for i in range(1, len(rows)):
        # Crea una lista para almacenar las celdas de la fila
        row_data = []
        
        cells = rows[i].find_elements(By.XPATH, ".//td")
        
        for cell in cells:
            # Añade el texto de la celda a la lista de la fila
            row_data.append(cell.text)
        
        # Añade la lista de la fila al array de la tabla
        table_data.append(row_data)
        
    return table_data


In [54]:
def scraping():    # Scrapear los datos
    driver = webdriver.Chrome()    # Inicio el navegador 
    wait = WebDriverWait(driver, 10)    # Espera a que el botón esté visible o pasen 10 segundos

    driver.get('https://results.ittf.link/index.php')    # Me dirijo a la pagina correspondiente
    
    driver.maximize_window()    # Maximizar la ventana del navegador
    
    accept_cookies = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'jb-accept')))
    accept_cookies.click()
    
    username = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[type="text"][name="username"]')))
    username.send_keys('Selenium1')
    
    password = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[type="password"][name="password"]')))
    password.send_keys('Selenium1**')

    bottom_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.gotobottom")))
    bottom_button.click()
    
    time.sleep(2)

    continue_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[name='Submit']")))
    continue_button.click()    # Haz clic en el botón "Log in"
    
    arrow = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[text()='Matches']")))
    arrow.click()    
    
    link = wait.until(EC.element_to_be_clickable((By.XPATH, '//a[@href="/index.php/matches/players-matches/list/29"]')))
    link.click()    
    
    select_element = wait.until(EC.element_to_be_clickable((By.ID, "limit29")))
    select_element.click()
    
    select = Select(select_element)
    select.select_by_index(len(select.options)-1)
    
    names = []
    with open('clasificated.json', 'r') as f:
        names = json.load(f)
    
    table_data = []
    diccionario = {}
    
    for i in range(len(names)):
        table_data = results(names[i], driver)
        diccionario[names[i]] = table_data
    
    # Guardar el diccionario convertido en el archivo JSON
    with open("data.json", "w") as archivo_json:
        json.dump(diccionario, archivo_json, indent=4)
    
    driver.quit()    # Cierro el navegador
    return

# Proceso Estadistico

In [69]:
def enfrentamiento(team1, team2):
    return random.choice([team1, team2])

In [117]:
def FirstRounds(homeclubs, aways):
    winners = []
    for i in range(16):
        team1 = random.choice(homeclubs)
        homeclubs.remove(team1)
        if aways:
            team2 = random.choice(aways)
            aways.remove(team2)
            winners.append(enfrentamiento(team1, team2))
        else:
            winners.append(team1)
    return winners

In [71]:
def Bracket(homes, aways):
    number1 = homes[0]
    number2 = homes[1]
    number3to4 = homes[2:4]
    number5to8 = homes[4:9]
    number9to16 = homes[8:]
    
    # Round of 32
    team1 = number1
    team2 = random.choice(aways)
    aways.remove(team2)
    winner1 = enfrentamiento(team1, team2)
    
    team1 = random.choice(number9to16)
    number9to16.remove(team1)
    team2 = random.choice(aways)
    aways.remove(team2)
    winner2 = enfrentamiento(team1, team2)
    
    team1 = random.choice(number9to16)
    number9to16.remove(team1)
    team2 = random.choice(aways)
    aways.remove(team2)
    winner3 = enfrentamiento(team1, team2)
    
    team1 = random.choice(number5to8)
    number5to8.remove(team1)
    team2 = random.choice(aways)
    aways.remove(team2)
    winner4 = enfrentamiento(team1, team2)
    
    team1 = random.choice(number5to8)
    number5to8.remove(team1)
    team2 = random.choice(aways)
    aways.remove(team2)
    winner5 = enfrentamiento(team1, team2)
    
    team1 = random.choice(number9to16)
    number9to16.remove(team1)
    team2 = random.choice(aways)
    aways.remove(team2)
    winner6 = enfrentamiento(team1, team2)
    
    team1 = random.choice(number9to16)
    number9to16.remove(team1)
    team2 = random.choice(aways)
    aways.remove(team2)
    winner7 = enfrentamiento(team1, team2)
    
    team1 = random.choice(number3to4)
    number3to4.remove(team1)
    team2 = random.choice(aways)
    aways.remove(team2)
    winner8 = enfrentamiento(team1, team2)
    
    team1 = random.choice(number3to4)
    number3to4.remove(team1)
    team2 = random.choice(aways)
    aways.remove(team2)
    winner9 = enfrentamiento(team1, team2)
    
    team1 = random.choice(number9to16)
    number9to16.remove(team1)
    team2 = random.choice(aways)
    aways.remove(team2)
    winner10 = enfrentamiento(team1, team2)
    
    team1 = random.choice(number9to16)
    number9to16.remove(team1)
    team2 = random.choice(aways)
    aways.remove(team2)
    winner11 = enfrentamiento(team1, team2)
    
    team1 = random.choice(number5to8)
    number5to8.remove(team1)
    team2 = random.choice(aways)
    aways.remove(team2)
    winner12 = enfrentamiento(team1, team2)
    
    team1 = random.choice(number5to8)
    number5to8.remove(team1)
    team2 = random.choice(aways)
    aways.remove(team2)
    winner13 = enfrentamiento(team1, team2)
    
    team1 = random.choice(number9to16)
    number9to16.remove(team1)
    team2 = random.choice(aways)
    aways.remove(team2)
    winner14 = enfrentamiento(team1, team2)
    
    team1 = random.choice(number9to16)
    number9to16.remove(team1)
    team2 = random.choice(aways)
    aways.remove(team2)
    winner15 = enfrentamiento(team1, team2)
    
    team1 = number2
    team2 = random.choice(aways)
    aways.remove(team2)
    winner16 = enfrentamiento(team1, team2)
    
    # Round of 16
    winner1R16 = enfrentamiento(winner1, winner2)
    winner2R16 = enfrentamiento(winner3, winner4)
    winner3R16 = enfrentamiento(winner5, winner6)
    winner4R16 = enfrentamiento(winner7, winner8)
    winner5R16 = enfrentamiento(winner9, winner10)
    winner6R16 = enfrentamiento(winner11, winner12)
    winner7R16 = enfrentamiento(winner13, winner14)
    winner8R16 = enfrentamiento(winner15, winner16)
    
    # Quarterfinals
    winner1QF = enfrentamiento(winner1R16, winner2R16)
    winner2QF = enfrentamiento(winner3R16, winner4R16)
    winner3QF = enfrentamiento(winner5R16, winner6R16)
    winner4QF = enfrentamiento(winner7R16, winner8R16)
    
    # Semifinals
    winner1SF = enfrentamiento(winner1QF, winner2QF)
    winner2SF = enfrentamiento(winner3QF, winner4QF)
    
    # Final
    FinalWinner = enfrentamiento(winner1SF, winner2SF)
    
    return FinalWinner

In [128]:
# Supongamos que tus datos están en esta lista de tuplas
datos = [("Jugador" + str(i), i) for i in range(1, 68)] #! Lista de Clasificados
orden = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,   #! Ranking Mundial
        11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 
        21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 
        31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 
        41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 
        51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 
        61, 62, 63, 64, 65, 66, 67] 

datos = sorted(datos, key=orden.index)

# Ronda Preliminar
aways16 = FirstRounds(datos[16:32], FirstRounds(datos[32:48], FirstRounds(datos[48:64], datos[64:])))

Bracket(datos[:16], aways16)




('Jugador12', 12)