### **Librerías necesarias para el Web Scraping**

In [2]:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
import time
import os
from dotenv import load_dotenv
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### **Funciones**

In [19]:
def close_privacy_iframe(wait, driver):
    try:
        iframe = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "iframe[id^='sp_message_iframe']")))
        driver.switch_to.frame(iframe)
        close_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Aceptar')]")))
        close_button.click()
        driver.switch_to.default_content()
        time.sleep(5)
    except Exception as e:
        print(f"No se encontró o no se pudo cerrar el iframe de privacidad: {e}")
        
def add_values_to_list(static_list, dynamic_list, value_list):
    i = 0
    while i < len(static_list):
        if i >= len(dynamic_list) or static_list[i] != dynamic_list[i]:
            dynamic_list.insert(i, static_list[i])
            value_list.insert(i, "")
        elif dynamic_list[i] == "Agente" or dynamic_list[i] == "Club actual" or dynamic_list[i] == "Redes sociales":
            value_list.insert(i, "")
        i += 1

def get_data_from_web_scraping(players, web_path, content_values_key):
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")

    load_dotenv()

    service = Service(os.getenv("LOCAL_PATH"))

    driver = webdriver.Chrome(service=service, options=chrome_options)

    driver.get(web_path)

    wait = WebDriverWait(driver, 5)

    players_info= []

    close_privacy_iframe(wait=wait, driver=driver)

    for player in players:
        try:
            input_element = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "tm-header__input--search-field")))
            button_element = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "tm-header__input--search-send")))
            input_element.clear()
            input_element.send_keys(player)
            button_element.click()
            time.sleep(2)

            first_result = wait.until(EC.visibility_of_element_located((By.XPATH, "//tbody/tr[1]/td/table/tbody/tr[1]/td[2]/a")))
            first_result.click()
            time.sleep(2)

            info_table = wait.until(EC.presence_of_element_located((By.XPATH, "//div[@class='info-table info-table--right-space ']")))

            player_info = {}
            content_labels = [element.text.strip().replace(':', '') for element in info_table.find_elements(By.XPATH, ".//span[@class='info-table__content info-table__content--regular']")]
            content_values = [element.text.strip() for element in info_table.find_elements(By.XPATH, ".//span[@class='info-table__content info-table__content--bold']")]
            content_values = [item for item in content_values if item.strip() != ""]
            
            add_values_to_list(static_list=content_values_key, dynamic_list=content_labels, value_list=content_values)
            
            for label, value in zip(content_labels, content_values):
                player_info[label] = value
            
            
            info_image_of_profile_player = wait.until(EC.presence_of_element_located((By.XPATH, "//div[@class='modal-trigger']/img[@class='data-header__profile-image']")))
            url_image_of_profile_player = info_image_of_profile_player.get_attribute("src")
            player_info["Image URL"] = url_image_of_profile_player
            
            WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, "data-header__box--small"))
    )

            info_market_box = driver.find_element(By.CLASS_NAME, "data-header__box--small")
            value_market = info_market_box.find_element(By.CLASS_NAME, "data-header__market-value-wrapper")
            last_value_market = info_market_box.find_element(By.CLASS_NAME, "data-header__last-update")
            
            player_info["Valor de mercado actual"] = value_market.text.split("\n")[0]
            player_info["Último revisión"] = last_value_market.text
            
            span_find_element = info_table.find_elements(By.XPATH, "./span")

            span_position_agente = None
            span_position_club = None
            
            for index, span in enumerate(span_find_element, start=0):
                if span.text == "Agente:":
                    span_position_agente = index
                    span_position_agente += 1
                if span.text == "Club actual:":
                    span_position_club = index
                    span_position_club += 1
            
            if span_position_agente is None:
                player_info["Agente 2"] = ""
            else:
                span_agente = span_find_element[span_position_agente]
            
                span_sub_agente = span_agente.find_elements(By.XPATH, "./span")
                
                if span_sub_agente:
                    first_span = span_sub_agente[0]
                    inner_span = first_span.find_element(By.XPATH, ".//span")
                    player_info["Agente 2"] = inner_span.text
                else:
                    a_agente = span_agente.find_element(By.XPATH, "./a").text.strip()
                    player_info["Agente 2"] = a_agente
                
            span_club = span_find_element[span_position_club]
            a_club = span_club.find_element(By.XPATH, "./a[position()=2]").text.strip()
            player_info["Club 2"] = a_club
            
            
            players_info.append(player_info)
            
            driver.get(web_path)
            
        except Exception as e:
            print(f"Hubo un problema con '{player}': {e}")
            
    driver.quit()
    return players_info


### **Declaración de constantes**

In [17]:
web_path = "https://www.transfermarkt.pe/"

In [18]:
content_values_key = [
    "Nombre en país de origen",
    "F. Nacim./Edad",
    "Lugar de nacimiento",
    "Altura",
    "Nacionalidad",
    "Posición",
    "Pie",
    "Agente",
    "Club actual",
    "Fichado",
    "Contrato hasta",
    "Opción de contrato",
    "Última renovación",
    "Proveedor",
    "Redes sociales"
]

### **Nombre de los jugadores de la selección peruana de los cuales se extraerá los datos de MarketTransfer**

In [20]:
players_peru = ["pedro gallese", "carlos zambrano", "miguel araujo", "alexander callens", "andy polo", "marcos lópez", "wilder cartagena",
           "piero quispe", "sergio peña", "edison flores", "gianluca lapadula"]

In [21]:
players_info_peru = get_data_from_web_scraping(players=players_peru, web_path=web_path,content_values_key=content_values_key)
df_peru = pd.DataFrame(players_info_peru).fillna("")
df_peru = df_peru.drop(["Agente", "Club actual", "Redes sociales"], axis = 1)
df_peru = df_peru.rename(columns = {"Agente 2":"Agente", "Club 2":"Club"})
df_peru

Unnamed: 0,Nombre en país de origen,F. Nacim./Edad,Lugar de nacimiento,Altura,Nacionalidad,Posición,Pie,Fichado,Contrato hasta,Opción de contrato,Última renovación,Proveedor,Image URL,Valor de mercado actual,Último revisión,Agente,Club,Prestado de,Contrato allí hasta
0,Pedro David Gallese Quiroz,23/02/1990 (34),Lima,"1,89 m",Perú,Portero,izquierdo,17/01/2020,31/12/2024,Un año más por parte del club,27/12/2022,adidas,https://img.a.transfermarkt.technology/portrai...,"1,20 mill. €",Última revisión: 13/06/2024,Adequatesports,Orlando City SC,,
1,Carlos Augusto Zambrano Ochandarte,10/07/1989 (34),Callao,"1,85 m",Perú,Defensa - Defensa central,derecho,08/01/2023,31/12/2024,,,Nike,https://img.a.transfermarkt.technology/portrai...,375 mil €,Última revisión: 28/05/2024,Miembro de su ...,Club Alianza Lima,,
2,Miguel Gianpierre Araujo Blanco,24/10/1994 (29),Lima,"1,81 m",Perú,Defensa - Defensa central,derecho,05/07/2023,31/12/2025,Un año más por parte del club,,Nike,https://img.a.transfermarkt.technology/portrai...,850 mil €,Última revisión: 13/06/2024,AGREF,Portland Timbers,,
3,Alexander Martín Marquinho Callens Asín,04/05/1992 (32),Callao,"1,86 m",Perú\n España,Defensa - Defensa central,izquierdo,08/09/2023,30/06/2024,Opción de compra,,Nike,https://img.a.transfermarkt.technology/portrai...,"1,20 mill. €",Última revisión: 30/05/2024,DV7 Management,AEK Atenas FC,Girona FC,30/06/2025
4,Andy Jorman Polo Andrade,29/09/1994 (29),Lima,"1,74 m",Perú,Delantero - Extremo derecho,derecho,21/03/2022,31/12/2025,,09/12/2023,Nike,https://img.a.transfermarkt.technology/portrai...,"1,00 mill. €",Última revisión: 28/05/2024,Promoesport,Universitario de Deportes,,
5,Marcos Johan López Lanfranco,20/11/1999 (24),Lima,"1,76 m",Perú,Defensa - Lateral izquierdo,izquierdo,08/08/2022,30/06/2026,,,Nike,https://img.a.transfermarkt.technology/portrai...,"3,00 mill. €",Última revisión: 31/05/2024,Vibra Futbol,Feyenoord Rotterdam,,
6,Wilder José Cartagena Mendoza,23/09/1994 (29),Lima,"1,80 m",Perú,Medio campo - Pivote,derecho,31/01/2024,31/12/2025,,,Nike,https://img.a.transfermarkt.technology/portrai...,"2,50 mill. €",Última revisión: 13/06/2024,FFOSA Sports,Orlando City SC,,
7,Piero Aldair Quispe Córdova,14/08/2001 (22),Lima,"1,68 m",Perú,Medio campo - Mediocentro ofensivo,derecho,01/01/2024,31/12/2026,,,Nike,https://img.a.transfermarkt.technology/portrai...,"1,80 mill. €",Última revisión: 14/06/2024,Teamtalentsagency,Pumas UNAM,,
8,Sergio Fernando Peña Flores,28/09/1995 (28),Lima,"1,78 m",Perú\n España,Medio campo - Mediocentro,derecho,04/08/2021,31/12/2024,,,Nike,https://img.a.transfermarkt.technology/portrai...,"2,80 mill. €",Última revisión: 13/06/2024,AGREF,Malmoe FF,,
9,Edison Michael Flores Peralta,14/05/1994 (30),Lima,"1,70 m",Perú,Delantero - Extremo izquierdo,izquierdo,01/07/2023,31/12/2024,,,adidas,https://img.a.transfermarkt.technology/portrai...,"1,20 mill. €",Última revisión: 28/05/2024,AGREF,Universitario de Deportes,Atlas Guadalajara,-


### **Nombre de los jugadores de la selección argentina de los cuales se extraerá los datos de MarketTransfer**

In [23]:
players_argentina = ["emiliano martínez", "nahuel molina", "cristian romero", "lisandro martínez", "nicolás tagliafico", "enzo fernández",
           "alexis mac allister", "rodrigo de paul", "nicolás gonzález", "lionel messi", "julián álvarez"]

In [25]:
players_info_argentina = get_data_from_web_scraping(players=players_argentina, web_path=web_path, content_values_key=content_values_key)
df_argentina = pd.DataFrame(players_info_argentina).fillna("")
df_argentina = df_argentina.drop(["Agente", "Club actual", "Redes sociales"], axis = 1)
df_argentina.loc[3, "Nombre en país de origen"] = "Lisandro Martínez"
df_argentina.loc[6, "Nombre en país de origen"] = "Alexis Mac Allister"
df_argentina.loc[10, "Nombre en país de origen"] = "Julián Álvarez"
df_argentina = df_argentina.rename(columns = {"Agente 2":"Agente", "Club 2":"Club"})
df_argentina

Unnamed: 0,Nombre en país de origen,F. Nacim./Edad,Lugar de nacimiento,Altura,Nacionalidad,Posición,Pie,Fichado,Contrato hasta,Opción de contrato,Última renovación,Proveedor,Image URL,Valor de mercado actual,Último revisión,Agente,Club
0,Damián Emiliano Martínez Romero,02/09/1992 (31),Mar del Plata,"1,95 m",Argentina\n España,Portero,derecho,16/09/2020,30/06/2027,,21/01/2022,adidas,https://img.a.transfermarkt.technology/portrai...,"28,00 mill. €",Última revisión: 27/05/2024,Universal Twenty Two,Aston Villa
1,Nahuel Molina Lucero,06/04/1998 (26),Embalse,"1,75 m",Argentina,Defensa - Lateral derecho,derecho,28/07/2022,30/06/2027,,,,https://img.a.transfermarkt.technology/portrai...,"28,00 mill. €",Última revisión: 07/06/2024,,Atlético de Madrid
2,Cristian Gabriel Romero,27/04/1998 (26),Córdoba,"1,85 m",Argentina,Defensa - Defensa central,derecho,30/08/2022,30/06/2027,,,,https://img.a.transfermarkt.technology/portrai...,"60,00 mill. €",Última revisión: 27/05/2024,Ciro Palermo,Tottenham Hotspur
3,Lisandro Martínez,18/01/1998 (26),Gualeguay,"1,75 m",Argentina,Defensa - Defensa central,izquierdo,27/07/2022,30/06/2027,Un año más,,,https://img.a.transfermarkt.technology/portrai...,"45,00 mill. €",Última revisión: 27/05/2024,Score Futbol,Manchester United
4,Nicolás Alejandro Tagliafico,31/08/1992 (31),Rafael Calzada,"1,72 m",Argentina\n Italia,Defensa - Lateral izquierdo,izquierdo,23/07/2022,30/06/2025,,,Nike,https://img.a.transfermarkt.technology/portrai...,"8,00 mill. €",Última revisión: 03/06/2024,CAA Stellar,Olympique de Lyon
5,Enzo Jeremías Fernández,17/01/2001 (23),San Martín,"1,78 m",Argentina,Medio campo - Mediocentro,derecho,31/01/2023,30/06/2032,,,,https://img.a.transfermarkt.technology/portrai...,"75,00 mill. €",Última revisión: 27/05/2024,DE 9 FÚTBOL,Chelsea FC
6,Alexis Mac Allister,24/12/1998 (25),Santa Rosa,"1,76 m",Argentina\n Italia,Medio campo - Mediocentro,derecho,01/07/2023,30/06/2028,,,adidas,https://img.a.transfermarkt.technology/portrai...,"75,00 mill. €",Última revisión: 27/05/2024,,Liverpool FC
7,Rodrigo Javier de Paul,24/05/1994 (30),Sarandí,"1,80 m",Argentina\n Italia,Medio campo - Mediocentro,derecho,12/07/2021,30/06/2026,,,adidas,https://img.a.transfermarkt.technology/portrai...,"30,00 mill. €",Última revisión: 07/06/2024,Augustin Jimenez,Atlético de Madrid
8,Nicolás Iván González,06/04/1998 (26),Belén de Escobar,"1,80 m",Argentina\n Italia,Delantero - Extremo derecho,izquierdo,01/07/2021,30/06/2028,,26/09/2023,,https://img.a.transfermarkt.technology/portrai...,"35,00 mill. €",Última revisión: 05/06/2024,Gea World,Fiorentina
9,Lionel Andrés Messi Cuccitini,24/06/1987 (37),Rosario,"1,70 m",Argentina\n España,Delantero - Extremo derecho,izquierdo,15/07/2023,31/12/2025,,,adidas,https://img.a.transfermarkt.technology/portrai...,"30,00 mill. €",Última revisión: 13/06/2024,Miembro de su ...,Inter Miami CF
