In [17]:
import pandas as pd

df = pd.read_csv('df_fifawc2022.csv')
df = df.reset_index()
df = df.drop("index", axis=1)

df.columns = [col.replace(' ', '_') for col in df.columns]

teams = df[['team1', 'team2']].values.ravel()
teams = pd.unique(teams)

consolidated_data = []

for team in teams:
    team_data = {}
    for column in df.columns:
        # Excluir 'team1', 'team2' y 'Unnamed: 0'
        if column == 'team1' or column == 'team2' or column == 'Unnamed: 0':
            continue

        team1_values = df.loc[df['team1'] == team, column]
        team2_values = df.loc[df['team2'] == team, column]

        if '_team1' in column or '_team2' in column:
            team_data[column.replace('_team1', '').replace('_team2', '')] = (team1_values.sum() + team2_values.sum())
        else:
            all_values = pd.concat([team1_values.reset_index(drop=True),
                                    team2_values.reset_index(drop=True)])
            team_data[column] = all_values.mean()

    consolidated_data.append(team_data)

consolidated_df = pd.DataFrame(consolidated_data, index=teams)
print(consolidated_df)

                Unnamed:_0  possession  possession_in_contest  \
QATAR            16.333333        1.36               0.086667   
ECUADOR          17.000000        1.21               0.126667   
ENGLAND          32.800000        1.88               0.098000   
IRAN             17.333333        0.97               0.126667   
SENEGAL          25.750000        1.60               0.122500   
NETHERLANDS      31.400000        2.20               0.112000   
UNITED STATES    26.250000        1.78               0.112500   
WALES            17.666667        1.30               0.120000   
ARGENTINA        42.000000        3.00               0.120000   
SAUDI ARABIA     21.333333        1.36               0.143333   
DENMARK          21.000000        1.37               0.133333   
TUNISIA          20.666667        1.20               0.150000   
MEXICO           22.666667        1.22               0.153333   
POLAND           28.750000        1.97               0.120000   
FRANCE           42.71428

In [18]:
sorted_stats = {}

# Iterar sobre cada columna (estadística)
for stat in consolidated_df.columns:
    sorted_stats[stat] = consolidated_df[stat].sort_values(ascending=False).index.tolist()

# Convertir a DataFrame (opcional)
sorted_stats_df = pd.DataFrame(sorted_stats)

sorted_stats_df

Unnamed: 0,Unnamed:_0,possession,possession_in_contest,number_of_goals,total_attempts,conceded,goal_inside_the_penalty_area,goal_outside_the_penalty_area,assists,on_target_attempts,...,crosses,crosses_completed,switches_of_play_completed,corners,free_kicks,penalties_scored,goal_preventions,own_goals,forced_turnovers,defensive_pressures_applied
0,MOROCCO,MOROCCO,MEXICO,ARGENTINA,CROATIA,FRANCE,ARGENTINA,ENGLAND,SERBIA,ARGENTINA,...,CROATIA,CROATIA,MOROCCO,ARGENTINA,ARGENTINA,ARGENTINA,FRANCE,MOROCCO,ARGENTINA,ARGENTINA
1,CROATIA,CROATIA,TUNISIA,FRANCE,ARGENTINA,PORTUGAL,FRANCE,WALES,GERMANY,CROATIA,...,MOROCCO,FRANCE,CROATIA,MOROCCO,MOROCCO,FRANCE,CROATIA,CANADA,CROATIA,CROATIA
2,FRANCE,FRANCE,SAUDI ARABIA,ENGLAND,MOROCCO,ARGENTINA,GHANA,MOROCCO,FRANCE,NETHERLANDS,...,FRANCE,MOROCCO,ARGENTINA,CROATIA,CROATIA,ECUADOR,ARGENTINA,ECUADOR,FRANCE,FRANCE
3,ARGENTINA,ARGENTINA,AUSTRALIA,GHANA,FRANCE,CROATIA,SENEGAL,MEXICO,ENGLAND,BRAZIL,...,ARGENTINA,ARGENTINA,FRANCE,BRAZIL,NETHERLANDS,QATAR,MOROCCO,BRAZIL,MOROCCO,MOROCCO
4,PORTUGAL,PORTUGAL,URUGUAY,SENEGAL,NETHERLANDS,ENGLAND,GERMANY,BRAZIL,GHANA,POLAND,...,NETHERLANDS,SENEGAL,KOREA REPUBLIC,NETHERLANDS,FRANCE,ENGLAND,BRAZIL,GHANA,BRAZIL,BRAZIL
5,BRAZIL,NETHERLANDS,DENMARK,GERMANY,POLAND,SWITZERLAND,NETHERLANDS,KOREA REPUBLIC,ARGENTINA,FRANCE,...,PORTUGAL,NETHERLANDS,POLAND,GERMANY,PORTUGAL,IRAN,PORTUGAL,PORTUGAL,NETHERLANDS,ENGLAND
6,SWITZERLAND,BRAZIL,SERBIA,SERBIA,BRAZIL,SPAIN,QATAR,CAMEROON,COSTA RICA,ENGLAND,...,BRAZIL,PORTUGAL,BRAZIL,PORTUGAL,BRAZIL,SENEGAL,KOREA REPUBLIC,KOREA REPUBLIC,ENGLAND,NETHERLANDS
7,KOREA REPUBLIC,POLAND,GHANA,NETHERLANDS,ENGLAND,BRAZIL,COSTA RICA,CANADA,JAPAN,GERMANY,...,UNITED STATES,BRAZIL,QATAR,FRANCE,ENGLAND,NETHERLANDS,JAPAN,URUGUAY,PORTUGAL,SPAIN
8,SPAIN,SPAIN,IRAN,WALES,GERMANY,COSTA RICA,JAPAN,FRANCE,PORTUGAL,SAUDI ARABIA,...,AUSTRALIA,ENGLAND,SENEGAL,SENEGAL,SENEGAL,UNITED STATES,AUSTRALIA,CAMEROON,JAPAN,PORTUGAL
9,ENGLAND,JAPAN,ECUADOR,KOREA REPUBLIC,PORTUGAL,NETHERLANDS,ECUADOR,SERBIA,KOREA REPUBLIC,UNITED STATES,...,SENEGAL,BELGIUM,PORTUGAL,POLAND,SWITZERLAND,WALES,ENGLAND,SWITZERLAND,SWITZERLAND,JAPAN


In [19]:
import pandas as pd

top_teams = {}
for column in consolidated_df.columns:
  top_teams[column] = consolidated_df.sort_values(by=column, ascending=False).iloc[0].name

# Contar las veces que cada equipo aparece como "top"
from collections import Counter
top_team_counts = Counter(top_teams.values())

# Obtener los 3 equipos que más aparecen como "top"
top_4_teams = top_team_counts.most_common(4)  # Obtener los 3 más comunes

print("Los 4 mejores equipos que más veces lideran las estadísticas son:")
for team, count in top_4_teams:
    print(f"{team}: {count} veces")

Los 4 mejores equipos que más veces lideran las estadísticas son:
CROATIA: 12 veces
MOROCCO: 11 veces
ARGENTINA: 10 veces
FRANCE: 4 veces


In [20]:
argentina_stats = []
for column in consolidated_df.columns:
  top_team = consolidated_df.sort_values(by=column, ascending=False).iloc[0].name
  if top_team == 'ARGENTINA':
    argentina_stats.append(column)

print("Estadísticas en las que Argentina es líder:")
for stat in argentina_stats:
  print(stat)

Estadísticas en las que Argentina es líder:
number_of_goals
goal_inside_the_penalty_area
on_target_attempts
yellow_cards
fouls_against
corners
free_kicks
penalties_scored
forced_turnovers
defensive_pressures_applied


In [21]:
import pandas as pd
from collections import Counter

top_4_appearances = []
for column in consolidated_df.columns:
  top_4_teams = consolidated_df.sort_values(by=column, ascending=False).head(4).index.tolist()
  if 'ARGENTINA' in top_4_teams:
    top_4_appearances.append('ARGENTINA')

argentina_count = Counter(top_4_appearances)['ARGENTINA']
total_estadisticas = consolidated_df.shape[1]

print(f"Argentina aparece entre los 4 primeros en {argentina_count} estadísticas.")
print(f"El total de estadísticas analizadas es: {total_estadisticas}")

Argentina aparece entre los 4 primeros en 34 estadísticas.
El total de estadísticas analizadas es: 42


In [22]:
argentina_stats = []
for column in consolidated_df.columns:
  top_4_teams = consolidated_df.sort_values(by=column, ascending=False).head(4).index.tolist()
  if 'ARGENTINA' in top_4_teams:
    argentina_stats.append(column)

print("Estadísticas en las que Argentina está entre los 4 primeros:")
for stat in argentina_stats:
  print(stat)

Estadísticas en las que Argentina está entre los 4 primeros:
Unnamed:_0
possession
number_of_goals
total_attempts
conceded
goal_inside_the_penalty_area
on_target_attempts
off_target_attempts
attempts_inside_the_penalty_area
attempts_inside_the_penalty_area_
attempts_outside_the_penalty_area_
central_channel
total_offers_to_receive
inbehind_offers_to_receive
inbetween_offers_to_receive
infront_offers_to_receive
receptions_between_midfield_and_defensive_lines
attempted_line_breaks
completed_line_breaks
attempted_defensive_line_breaks
completed_defensive_line_breaks
yellow_cards
fouls_against
passes
passes_completed
crosses
crosses_completed
switches_of_play_completed
corners
free_kicks
penalties_scored
goal_preventions
forced_turnovers
defensive_pressures_applied
