In [1]:
import pandas as pd
import glob
from collections import defaultdict
import altair as alt
from pyvis.network import Network

In [2]:
# Lleva a una lista todos los nombres de los archivos necesarios
file_list = glob.glob('data/atp_matches_*.csv')

In [3]:
MostWinsDict = defaultdict(lambda: {'Victorias': 0, 'Total_Partidos': 0})
victorias = defaultdict(int)
TotalPartidos = defaultdict(int)
H2HDict = defaultdict(lambda: {'total_matches': 0, 'wins_player1': 0, 'wins_player2': 0})

for file in file_list:
    df = pd.read_csv(file)

    for _, row in df.iterrows():
        winner = row['winner_name']
        loser = row['loser_name']

        MostWinsDict[winner]['Victorias'] += 1
        MostWinsDict[winner]['Total_Partidos'] += 1
        MostWinsDict[loser]['Total_Partidos'] += 1

        victorias[winner] += 1
        TotalPartidos[winner] += 1
        TotalPartidos[loser] += 1

        H2HDict[(winner, loser)]['total_matches'] += 1
        H2HDict[(loser, winner)]['total_matches'] += 1

        H2HDict[(winner, loser)]['wins_player1'] += 1
        H2HDict[(loser, winner)]['wins_player2'] += 1



In [4]:
import pandas as pd
import altair as alt

def BuscarH2HJugador(diccionario, jugador):
    # Filtramos directamente con un diccionario de comprensión
    return {key: data for key, data in diccionario.items() if key[0] == jugador}

def GraficaH2H(nombre_jugador):
    H2Hjugador = BuscarH2HJugador(H2HDict, nombre_jugador)

    # Utilizamos list comprehensions para crear las listas
    Rival = [y for (x, y), data in H2Hjugador.items() if data["total_matches"] >= 1]
    victorias = [data["wins_player1"] for (x, y), data in H2Hjugador.items() if data["total_matches"] >= 1]
    Derrotas = [data["total_matches"] - data["wins_player1"] for (x, y), data in H2Hjugador.items() if data["total_matches"] >= 1]

    data = pd.DataFrame({
        'Rival': Rival,
        'Victorias': victorias,
        'Derrotas': Derrotas
    })

    # Calcular total de enfrentamientos
    data['Total'] = data['Victorias'] + data['Derrotas']
    data = data.sort_values(by='Total', ascending=False).head(10)

    # Crear el gráfico de barras apiladas
    chart = alt.Chart(data).transform_fold(
        ['Victorias', 'Derrotas'],
        as_=['Resultado', 'Cantidad']
    ).mark_bar().encode(
        y=alt.Y('Rival:N', sort='-x'),
        x='Cantidad:Q',
        color='Resultado:N',
        tooltip=['Rival', 'Victorias', 'Derrotas', 'Total']
    ).properties(
        width=600,
        height=400,
        title='Head-to-Head de ' + nombre_jugador 
    )

    return chart

# Ejemplo de uso
chart = GraficaH2H('Roger Federer')
chart


In [5]:
# for player, stats in MostWinsDict.items():
#     victorias = stats['Victorias']
#     total_partidos = stats['Total_Partidos']
    
#     # Calcular el rendimiento y almacenarlo en el diccionario
#     if total_partidos > 0:
#         rendimiento = victorias / total_partidos
#     else:
#         rendimiento = 0.0
    
#     MostWinsDict[player]['Rendimiento'] = rendimiento

In [6]:
df_most_wins = pd.DataFrame.from_dict(MostWinsDict, orient='index').reset_index()
df_most_wins = df_most_wins.rename(columns={'index': 'Jugador'})

top_20_victorias = df_most_wins.sort_values(by='Victorias', ascending=False).head(20)

chart_victorias = alt.Chart(top_20_victorias).mark_bar().encode(
    x=alt.X('Victorias:Q', title='Victorias'),
    y=alt.Y('Jugador:N', title='Jugador', sort='-x'),
    color=alt.Color('Jugador:N', legend=None),
    tooltip=['Jugador', 'Victorias']
).properties(
    title='Top 20 jugadores con más victorias',
    width=600
)

chart_victorias

In [10]:
df_most_wins['Rendimiento'] = df_most_wins['Victorias'] / df_most_wins['Total_Partidos']
df_most_wins = df_most_wins[df_most_wins['Total_Partidos'] >= 100]
top_20_rendimiento = df_most_wins.sort_values(by='Rendimiento', ascending=False).head(20)

chart_rendimiento = alt.Chart(top_20_rendimiento).mark_bar().encode(
    x=alt.X('Rendimiento:Q', title='Rendimiento'),
    y=alt.Y('Jugador:N', title='Jugador', sort='-x'),
    color=alt.Color('Jugador:N',  legend=None),
    tooltip=['Jugador', 'Rendimiento']
).properties(
    title='Top 20 jugadores con mejor rendimiento (más de 100 partidos)',
    width=600
)

text = chart_rendimiento.mark_text(
    align='left',
    baseline='middle',
    dx=10,
    # color ='black'
).encode(
    text=alt.Text('Rendimiento:Q', format='.3f')
)

chart_rendimiento = (chart_rendimiento + text).interactive()

chart_rendimiento

In [1]:
victorias = defaultdict(int)

for file in file_list:
    df = pd.read_csv(file)

    for _, row in df.iterrows():
        winner = row['winner_name']
        loser = row['loser_name']

        victorias[winner] += 1
        
top_players = {player for player, stats in MostWinsDict.items() if stats['Victorias'] > 600}

net = Network(notebook=True, cdn_resources='in_line',select_menu=True, filter_menu=True)
net.toggle_physics(True)
net.set_options("""
var options = {
  "physics": {
    "barnes_hut": {
      "gravitationalConstant": -100,
      "centralGravity": 0.001,
      "springLength": 250,
      "springConstant": 0.09
    },
    "maxVelocity": 50,
    "solver": "forceAtlas2Based",
    "timestep": 0.35,
    "stabilization": {
      "enabled": true,
      "iterations": 2000,
      "updateInterval": 25
    }
  },
  "nodes": {
    "font": {
      "size": 50,
      "color": "#000000"
    },
    "scaling": {
      "label": {
        "enabled": true
      }
    }
  }
}
""")

for player in top_players:
    net.add_node(player, label=player, size=victorias[player]/20)

for key, stats in H2HDict.items():
    try:
        player1, player2 = key
        if player1 in top_players and player2 in top_players:
            total_matches = stats['total_matches']
            net.add_edge(player1, player2, value=10*total_matches, title=f"Total Partidos: {total_matches}", width=total_matches)
    except ValueError as e:
        print(f"Error procesando {key}: {e}")


# Guardar y mostrar el grafo
net.show('tennis_top_players.html')

NameError: name 'defaultdict' is not defined