The dataset is organized in 3 files:

**events.csv** contains event data about each game. Text commentary was scraped from: bbc.com, espn.com and onefootball.com

**ginf.csv** - contains metadata and market odds about each game. odds were collected from oddsportal.com

**dictionary.txt** contains a dictionary with the textual description of each categorical variable coded with integers

In [None]:
import pandas as pd
import io
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
ruta_archivo = '/content/events.csv'
df = pd.read_csv(ruta_archivo, delimiter=",", on_bad_lines="skip", engine="python")
df.head()

Unnamed: 0,id_odsp,id_event,sort_order,time,text,event_type,event_type2,side,event_team,opponent,...,player_in,player_out,shot_place,shot_outcome,is_goal,location,bodypart,assist_method,situation,fast_break
0,UFot0hit/,UFot0hit1,1,2,Attempt missed. Mladen Petric (Hamburg) left f...,1,12.0,2,Hamburg SV,Borussia Dortmund,...,,,6.0,2.0,0,9.0,2.0,1,1.0,0
1,UFot0hit/,UFot0hit2,2,4,"Corner, Borussia Dortmund. Conceded by Dennis...",2,,1,Borussia Dortmund,Hamburg SV,...,,,,,0,,,0,,0
2,UFot0hit/,UFot0hit3,3,4,"Corner, Borussia Dortmund. Conceded by Heiko ...",2,,1,Borussia Dortmund,Hamburg SV,...,,,,,0,,,0,,0
3,UFot0hit/,UFot0hit4,4,7,Foul by Sven Bender (Borussia Dortmund).,3,,1,Borussia Dortmund,Hamburg SV,...,,,,,0,,,0,,0
4,UFot0hit/,UFot0hit5,5,7,Gokhan Tore (Hamburg) wins a free kick in the ...,8,,2,Hamburg SV,Borussia Dortmund,...,,,,,0,2.0,,0,,0


event_type
0	Announcement
1	Attempt
2	Corner
3	Foul
4	Yellow card
5	Second yellow card
6	Red card
7	Substitution
8	Free kick won
9	Offside
10	Hand ball
11	Penalty conceded


event_type2
12	Key Pass
13	Failed through ball
14	Sending off
15	Own goal


In [None]:
df.dtypes

Unnamed: 0,0
id_odsp,object
id_event,object
sort_order,int64
time,int64
text,object
event_type,int64
event_type2,float64
side,int64
event_team,object
opponent,object


In [None]:
df.columns

Index(['id_odsp', 'link_odsp', 'adv_stats', 'date', 'league', 'season',
       'country', 'ht', 'at', 'fthg', 'ftag', 'odd_h', 'odd_d', 'odd_a',
       'odd_over', 'odd_under', 'odd_bts', 'odd_bts_n'],
      dtype='object')

In [None]:
#limpiar datos
df.isna().sum()

Unnamed: 0,0
id_odsp,0
id_event,0
sort_order,0
time,0
text,0
event_type,0
event_type2,102228
side,0
event_team,0
opponent,0


In [None]:
# Assuming df is your DataFrame
for index, row in df.iterrows():
    # Grouping by 'id' and 'value' columns and creating a new DataFrame df2
    df2 = df.groupby(['id', 'value']).first().reset_index()
    # Here, df2 will contain the grouped data for each iteration
    print(df2)

KeyError: 'id'

In [None]:

# Grouping by 'id_odsp' and 'event_type' columns and creating a new DataFrame df_grouped
df_grouped = df.groupby(['id_odsp', 'event_type']).first().reset_index()[['id_odsp', 'event_type']]

# Mostrar el resultado del agrupamiento
#df_grouped.head()
print(df_grouped)

        id_odsp  event_type
0     00LMl81F/           1
1     00LMl81F/           2
2     00LMl81F/           3
3     00LMl81F/           4
4     00LMl81F/           6
...         ...         ...
9639  zyJsFDYG/           4
9640  zyJsFDYG/           7
9641  zyJsFDYG/           8
9642  zyJsFDYG/           9
9643  zyJsFDYG/          10

[9644 rows x 2 columns]


In [None]:
#df tipos de id_odsp
#cuantos partidos son
df_grouped_event_type = df.groupby(['id_odsp']).size().reset_index(name='sum')
print(df_grouped_event_type)

        id_odsp  sum
0     00LMl81F/  125
1     00o3l4Ui/  121
2     02zs6b5s/  128
3     06djNeRR/  121
4     08DG4Hyq/  124
...         ...  ...
1187  zslhWYgp/  101
1188  zuVOoi82/  128
1189  zwhjOjUB/   78
1190  zwnPZNYb/  116
1191  zyJsFDYG/   94

[1192 rows x 2 columns]


In [None]:
#df tipos de evento
df_grouped_event_type = df.groupby(['event_type']).size().reset_index(name='sum')
print(df_grouped_event_type)

   event_type    sum
0           1  30569
1           2  11947
2           3  33441
3           4   5490
4           6    175
5           7   6741
6           8  34323
7           9   6511
8          10   1751
9          11    359


In [None]:
#lista solo partidos totales
# Obtener los valores únicos de la columna 'id_odsp' y convertirlos en un vector
id_odsp_vector = df_grouped['id_odsp'].unique().tolist()

# Mostrar el vector de 'id_odsp'
print("'id_odsp'")
print(id_odsp_vector)


'id_odsp'
['00LMl81F/', '00o3l4Ui/', '02zs6b5s/', '06djNeRR/', '08DG4Hyq/', '0Ed6GSCi/', '0GD66DBA/', '0Kiy9x9k/', '0MASHCU8/', '0MAc7tND/', '0ONfYtGQ/', '0Oe5DC5B/', '0OeqlZA6/', '0OkHW3cO/', '0QSBhGQ1/', '0SmX0UK5/', '0UAGJvO3/', '0Wesr5x5/', '0YfXdzoH/', '0YjmP8qh/', '0d1GLu17/', '0d5W4d5q/', '0dwru1ko/', '0h7VZsmi/', '0h7aRVZA/', '0hwNLGcR/', '0jIOeGCb/', '0l1deQCF/', '0nhb4xzf/', '0npZ4azT/', '0pcxXQBj/', '0pkZmdi2/', '0tDcRMMm/', '0tm1GfLl/', '0vWe9vtS/', '21Hp48Ia/', '230aj4m4/', '255TfkNe/', '25qICCKD/', '274ygKJp/', '27MBgtNR/', '29kj1anI/', '29oJUnAq/', '2B2aU0Qe/', '2Dc5B4Xr/', '2HaT5Umm/', '2JnKxxh6/', '2L3Z2YPa/', '2LJQHuDd/', '2NAituEL/', '2NlPT3gR/', '2PqYDkvI/', '2TJp3YeD/', '2TeFpTD3/', '2VO3wEch/', '2VhabDtK/', '2X9MT0Os/', '2ZfzybRb/', '2c1hVamU/', '2cJEnzcE/', '2cRbSamh/', '2caSveIJ/', '2eANxQKg/', '2eW00Jrp/', '2iCUwxV6/', '2iMUPaQ8/', '2mIGlrIG/', '2mfCxQlt/', '2o389iFI/', '2qnBco3q/', '2sgjXB8F/', '2uSvy4k6/', '2uhLz0Ir/', '2uxRRYUn/', '2w3IIeZi/', '2yY57aiR/', '

In [None]:
# Agrupar por 'id_odsp' y 'event_type' y contar las ocurrencias
df_grouped = df.groupby(['id_odsp', 'event_type']).size().reset_index(name='sum')

# Convertir el resultado a un vector (lista de listas)
vector_filtered = df_grouped.values.tolist()

# Mostrar el vector
print(vector_filtered)


[['00LMl81F/', 1, 39], ['00LMl81F/', 2, 17], ['00LMl81F/', 3, 24], ['00LMl81F/', 4, 5], ['00LMl81F/', 6, 1], ['00LMl81F/', 7, 6], ['00LMl81F/', 8, 24], ['00LMl81F/', 9, 6], ['00LMl81F/', 10, 3], ['00o3l4Ui/', 1, 31], ['00o3l4Ui/', 2, 9], ['00o3l4Ui/', 3, 35], ['00o3l4Ui/', 4, 2], ['00o3l4Ui/', 7, 6], ['00o3l4Ui/', 8, 35], ['00o3l4Ui/', 9, 3], ['02zs6b5s/', 1, 33], ['02zs6b5s/', 2, 10], ['02zs6b5s/', 3, 32], ['02zs6b5s/', 4, 4], ['02zs6b5s/', 7, 6], ['02zs6b5s/', 8, 34], ['02zs6b5s/', 9, 7], ['02zs6b5s/', 10, 2], ['06djNeRR/', 1, 26], ['06djNeRR/', 2, 10], ['06djNeRR/', 3, 33], ['06djNeRR/', 4, 3], ['06djNeRR/', 7, 6], ['06djNeRR/', 8, 33], ['06djNeRR/', 9, 9], ['06djNeRR/', 10, 1], ['08DG4Hyq/', 1, 27], ['08DG4Hyq/', 2, 13], ['08DG4Hyq/', 3, 35], ['08DG4Hyq/', 4, 2], ['08DG4Hyq/', 7, 6], ['08DG4Hyq/', 8, 35], ['08DG4Hyq/', 9, 6], ['0Ed6GSCi/', 1, 20], ['0Ed6GSCi/', 2, 5], ['0Ed6GSCi/', 3, 32], ['0Ed6GSCi/', 4, 7], ['0Ed6GSCi/', 7, 6], ['0Ed6GSCi/', 8, 34], ['0Ed6GSCi/', 9, 5], ['0Ed6GS

In [None]:


# Contar las ocurrencias agrupando por 'id_odsp' y 'event_type'
df_grouped = df.groupby(['id_odsp', 'event_type']).size().reset_index(name='count')

# Crear una tabla pivote
pivot_table = df_grouped.pivot(index='id_odsp', columns='event_type', values='count').fillna(0)

# Renombrar las columnas para mayor claridad
pivot_table.columns = [f'event_type{int(col)}' for col in pivot_table.columns]

# Resetear el índice para obtener un DataFrame
result = pivot_table.reset_index()

# Mostrar el resultado
print(result)


        id_odsp  event_type1  event_type2  event_type3  event_type4  \
0     00LMl81F/         39.0         17.0         24.0          5.0   
1     00o3l4Ui/         31.0          9.0         35.0          2.0   
2     02zs6b5s/         33.0         10.0         32.0          4.0   
3     06djNeRR/         26.0         10.0         33.0          3.0   
4     08DG4Hyq/         27.0         13.0         35.0          2.0   
...         ...          ...          ...          ...          ...   
1187  zslhWYgp/         16.0         10.0         26.0          6.0   
1188  zuVOoi82/         28.0          9.0         35.0          7.0   
1189  zwhjOjUB/         29.0         10.0         14.0          3.0   
1190  zwnPZNYb/         20.0          9.0         33.0          5.0   
1191  zyJsFDYG/         20.0         11.0         24.0          1.0   

      event_type6  event_type7  event_type8  event_type9  event_type10  \
0             1.0          6.0         24.0          6.0           3.0   

In [None]:
# Exportar el DataFrame 'result' a un archivo CSV
result.to_csv('resultados_eventos.csv', index=False)


In [None]:
# Seleccionar solo las columnas 'id_odsp' y 'event_type'
#UN SOLO PARTIDO Y EL TIPO DE EVENTO
df_selected = df[['id_odsp', 'event_type']]

# Filtrar los datos por un solo 'id_odsp' (reemplaza 'ID_TO_FILTER' con el id_odsp que deseas)
id_to_filter = '004f4ING/'  # Cambia esto por el id_odsp que quieras filtrar
df_filtered = df_selected[df_selected['id_odsp'] == id_to_filter]

# Mostrar el DataFrame filtrado
print(df_filtered)

# Crear un vector solo de la columna 'event_type' sin eliminar duplicados
id_eventtype_vector = df_filtered['event_type'].tolist()

# Mostrar el vector de 'event_type' (sin eliminar duplicados)
print("id_eventtype_vector:", id_eventtype_vector)



          id_odsp  event_type
580478  004f4ING/           8
580479  004f4ING/           3
580480  004f4ING/           8
580481  004f4ING/           3
580482  004f4ING/           1
...           ...         ...
580550  004f4ING/           7
580551  004f4ING/           2
580552  004f4ING/           1
580553  004f4ING/           8
580554  004f4ING/           3

[77 rows x 2 columns]
id_eventtype_vector: [8, 3, 8, 3, 1, 2, 8, 3, 1, 3, 8, 7, 1, 3, 8, 9, 1, 2, 1, 9, 2, 2, 8, 3, 2, 1, 8, 3, 4, 3, 8, 3, 8, 8, 3, 3, 8, 1, 1, 1, 1, 7, 2, 3, 8, 4, 1, 8, 3, 1, 8, 3, 1, 1, 2, 1, 3, 8, 1, 9, 7, 3, 8, 1, 1, 1, 1, 2, 3, 8, 6, 7, 7, 2, 1, 8, 3]


In [None]:
#SUM EVENT2

# Agrupar por 'id_odsp' y 'event_type' y contar las ocurrencias
df_grouped = df.groupby(['id_odsp', 'event_type']).size().reset_index(name='sum')

# Convertir el resultado a un vector (lista de listas)
vector_filtered = df_grouped.values.tolist()

# Mostrar el vector
#print(vector_filtered)

#filtrar resultados del vector, que solo sean event_type == 2
vector_filtered = [i for i in vector_filtered if i[1] == 2]

# Mostrar el vector filtrado
#print(vector_filtered)


#filtrar resultados del vector, que solo sean event_type == 2
vector_filtered_1 = [i for i in vector_filtered if i[1] == 2]

# Mostrar el vector filtrado
print(vector_filtered)

# Crear un nuevo vector solo con los valores de la columna 'sum' (tercer elemento de cada sublista)
sum_column_vector = [i[2] for i in vector_filtered]

# Mostrar el nuevo vector de la columna 'sum'
print("'sum' event2:", sum_column_vector)

[['004f4ING/', 2, 9], ['00LMl81F/', 2, 17], ['00OX4xFp/', 2, 10], ['00QH2XdM/', 2, 8], ['00QL4t1L/', 2, 5], ['00WAhrVe/', 2, 16], ['00Wld37M/', 2, 9], ['00bYNeD8/', 2, 11], ['00nmICd9/', 2, 10], ['00o3l4Ui/', 2, 9], ['00z6gfu9/', 2, 8], ['02VAgmpT/', 2, 11], ['02avyHPo/', 2, 8], ['02tzvPZD/', 2, 9], ['02wGxvvg/', 2, 9], ['02yaghdh/', 2, 7], ['02zs6b5s/', 2, 10], ['0405NhGb/', 2, 11], ['04C3DgjS/', 2, 8], ['04GXLvyK/', 2, 9], ['04Oyn4RO/', 2, 9], ['04fKg3D1/', 2, 18], ['04p71qRB/', 2, 15], ['04tOFUYO/', 2, 9], ['04vrPwsg/', 2, 9], ['067cLOoG/', 2, 12], ['06DGB9wI/', 2, 4], ['06JtM43d/', 2, 15], ['06LxFA96/', 2, 6], ['06TRgxEi/', 2, 9], ['06djNeRR/', 2, 10], ['088KBueM/', 2, 13], ['08CXI0Li/', 2, 10], ['08DG4Hyq/', 2, 13], ['08IqA65l/', 2, 13], ['08QSlM6B/', 2, 9], ['08UkRKf9/', 2, 6], ['08WhMpZn/', 2, 11], ['08YWlF0J/', 2, 11], ['08bP6Pp1/', 2, 8], ['08zJIs1A/', 2, 12], ['0A39ahem/', 2, 8], ['0A3SS7DG/', 2, 13], ['0A9tJToA/', 2, 7], ['0ABxtTSC/', 2, 8], ['0AHQxedJ/', 2, 12], ['0AOdGlid/

In [None]:
#SUM EVENT1

# Agrupar por 'id_odsp' y 'event_type' y contar las ocurrencias
df_grouped = df.groupby(['id_odsp', 'event_type']).size().reset_index(name='sum')

# Convertir el resultado a un vector (lista de listas)
vector_filtered = df_grouped.values.tolist()

# Mostrar el vector
#print(vector_filtered)

#filtrar resultados del vector, que solo sean event_type == 1
vector_filtered = [i for i in vector_filtered if i[1] == 1]

# Mostrar el vector filtrado
#print(vector_filtered)


#filtrar resultados del vector, que solo sean event_type == 1
vector_filtered_1 = [i for i in vector_filtered if i[1] == 1]

# Mostrar el vector filtrado
print(vector_filtered)

# Crear un nuevo vector solo con los valores de la columna 'sum' (tercer elemento de cada sublista)
sum_column_vector = [i[2] for i in vector_filtered]

# Mostrar el nuevo vector de la columna 'sum'
print("'sum' event1:", sum_column_vector)

[['004f4ING/', 1, 21], ['00LMl81F/', 1, 39], ['00OX4xFp/', 1, 18], ['00QH2XdM/', 1, 25], ['00QL4t1L/', 1, 16], ['00WAhrVe/', 1, 34], ['00Wld37M/', 1, 25], ['00bYNeD8/', 1, 25], ['00nmICd9/', 1, 29], ['00o3l4Ui/', 1, 31], ['00z6gfu9/', 1, 26], ['02VAgmpT/', 1, 21], ['02avyHPo/', 1, 22], ['02tzvPZD/', 1, 26], ['02wGxvvg/', 1, 28], ['02yaghdh/', 1, 17], ['02zs6b5s/', 1, 33], ['0405NhGb/', 1, 25], ['04C3DgjS/', 1, 37], ['04GXLvyK/', 1, 29], ['04Oyn4RO/', 1, 25], ['04fKg3D1/', 1, 36], ['04p71qRB/', 1, 29], ['04tOFUYO/', 1, 31], ['04vrPwsg/', 1, 28], ['067cLOoG/', 1, 30], ['06DGB9wI/', 1, 16], ['06JtM43d/', 1, 24], ['06LxFA96/', 1, 20], ['06TRgxEi/', 1, 30], ['06djNeRR/', 1, 26], ['088KBueM/', 1, 29], ['08CXI0Li/', 1, 20], ['08DG4Hyq/', 1, 27], ['08IqA65l/', 1, 25], ['08QSlM6B/', 1, 25], ['08UkRKf9/', 1, 23], ['08WhMpZn/', 1, 21], ['08YWlF0J/', 1, 28], ['08bP6Pp1/', 1, 18], ['08zJIs1A/', 1, 16], ['0A39ahem/', 1, 15], ['0A3SS7DG/', 1, 28], ['0A9tJToA/', 1, 28], ['0ABxtTSC/', 1, 28], ['0AHQxed

In [None]:
#SUM EVENT8

# Agrupar por 'id_odsp' y 'event_type' y contar las ocurrencias
df_grouped = df.groupby(['id_odsp', 'event_type']).size().reset_index(name='sum')

# Convertir el resultado a un vector (lista de listas)
vector_filtered = df_grouped.values.tolist()

# Mostrar el vector
#print(vector_filtered)

#filtrar resultados del vector, que solo sean event_type == 8
vector_filtered = [i for i in vector_filtered if i[1] == 8]

# Mostrar el vector filtrado
#print(vector_filtered)


#filtrar resultados del vector, que solo sean event_type == 8
vector_filtered_1 = [i for i in vector_filtered if i[1] == 8]

# Mostrar el vector filtrado
print(vector_filtered)

#SUM EVENT8
# Crear un nuevo vector solo con los valores de la columna 'sum' (tercer elemento de cada sublista)
sum_column_vector = [i[2] for i in vector_filtered]

# Mostrar el nuevo vector de la columna 'sum'
print("'sum' event8:", sum_column_vector)

[['004f4ING/', 8, 18], ['00LMl81F/', 8, 24], ['00OX4xFp/', 8, 29], ['00QH2XdM/', 8, 22], ['00QL4t1L/', 8, 32], ['00WAhrVe/', 8, 25], ['00Wld37M/', 8, 23], ['00bYNeD8/', 8, 34], ['00nmICd9/', 8, 30], ['00o3l4Ui/', 8, 35], ['00z6gfu9/', 8, 26], ['02VAgmpT/', 8, 37], ['02avyHPo/', 8, 30], ['02tzvPZD/', 8, 22], ['02wGxvvg/', 8, 33], ['02yaghdh/', 8, 23], ['02zs6b5s/', 8, 34], ['0405NhGb/', 8, 26], ['04C3DgjS/', 8, 27], ['04GXLvyK/', 8, 25], ['04Oyn4RO/', 8, 15], ['04fKg3D1/', 8, 21], ['04p71qRB/', 8, 26], ['04tOFUYO/', 8, 27], ['04vrPwsg/', 8, 24], ['067cLOoG/', 8, 31], ['06DGB9wI/', 8, 22], ['06JtM43d/', 8, 25], ['06LxFA96/', 8, 36], ['06TRgxEi/', 8, 18], ['06djNeRR/', 8, 33], ['088KBueM/', 8, 19], ['08CXI0Li/', 8, 36], ['08DG4Hyq/', 8, 35], ['08IqA65l/', 8, 25], ['08QSlM6B/', 8, 32], ['08UkRKf9/', 8, 29], ['08WhMpZn/', 8, 31], ['08YWlF0J/', 8, 14], ['08bP6Pp1/', 8, 30], ['08zJIs1A/', 8, 34], ['0A39ahem/', 8, 28], ['0A3SS7DG/', 8, 26], ['0A9tJToA/', 8, 39], ['0ABxtTSC/', 8, 28], ['0AHQxed

In [None]:
#SUM EVENT9

# Agrupar por 'id_odsp' y 'event_type' y contar las ocurrencias
df_grouped = df.groupby(['id_odsp', 'event_type']).size().reset_index(name='sum')

# Convertir el resultado a un vector (lista de listas)
vector_filtered = df_grouped.values.tolist()

# Mostrar el vector
#print(vector_filtered)

#filtrar resultados del vector, que solo sean event_type == 9
vector_filtered = [i for i in vector_filtered if i[1] == 9]

# Mostrar el vector filtrado
#print(vector_filtered)


#filtrar resultados del vector, que solo sean event_type == 9
vector_filtered_1 = [i for i in vector_filtered if i[1] == 9]

# Mostrar el vector filtrado
print(vector_filtered)

#SUM EVENT9
# Crear un nuevo vector solo con los valores de la columna 'sum' (tercer elemento de cada sublista)
sum_column_vector = [i[2] for i in vector_filtered]

# Mostrar el nuevo vector de la columna 'sum'
print("'sum' event9:", sum_column_vector)

[['004f4ING/', 9, 3], ['00LMl81F/', 9, 6], ['00OX4xFp/', 9, 3], ['00QH2XdM/', 9, 2], ['00QL4t1L/', 9, 5], ['00WAhrVe/', 9, 6], ['00Wld37M/', 9, 5], ['00bYNeD8/', 9, 3], ['00nmICd9/', 9, 6], ['00o3l4Ui/', 9, 3], ['00z6gfu9/', 9, 3], ['02VAgmpT/', 9, 14], ['02avyHPo/', 9, 4], ['02tzvPZD/', 9, 12], ['02wGxvvg/', 9, 6], ['02yaghdh/', 9, 8], ['02zs6b5s/', 9, 7], ['0405NhGb/', 9, 6], ['04C3DgjS/', 9, 5], ['04GXLvyK/', 9, 6], ['04Oyn4RO/', 9, 1], ['04fKg3D1/', 9, 10], ['04p71qRB/', 9, 4], ['04tOFUYO/', 9, 7], ['04vrPwsg/', 9, 5], ['067cLOoG/', 9, 4], ['06DGB9wI/', 9, 5], ['06JtM43d/', 9, 2], ['06LxFA96/', 9, 4], ['06TRgxEi/', 9, 7], ['06djNeRR/', 9, 9], ['088KBueM/', 9, 7], ['08CXI0Li/', 9, 5], ['08DG4Hyq/', 9, 6], ['08IqA65l/', 9, 6], ['08QSlM6B/', 9, 4], ['08UkRKf9/', 9, 5], ['08WhMpZn/', 9, 7], ['08YWlF0J/', 9, 8], ['08bP6Pp1/', 9, 7], ['08zJIs1A/', 9, 3], ['0A39ahem/', 9, 9], ['0A3SS7DG/', 9, 11], ['0A9tJToA/', 9, 5], ['0ABxtTSC/', 9, 4], ['0AHQxedJ/', 9, 4], ['0AOdGlid/', 9, 6], ['0AP48Y

In [None]:
#df de tipos de evento a vector
event_type_vector = df['event_type'].unique().tolist()
print(event_type_vector)


[1, 2, 3, 8, 10, 7, 9, 4, 6, 11, 5]


In [None]:
# Obtener los valores únicos de la columna 'sum' y convertirlos en un vector
event_type_vector = df_grouped['sum'].unique().tolist()

# Mostrar el vector de 'sum'
print("'sum'")
print(event_type_vector)


'sum'
[21, 9, 18, 2, 1, 5, 3, 39, 17, 24, 6, 10, 29, 25, 8, 22, 4, 16, 33, 32, 34, 23, 11, 30, 31, 35, 26, 38, 37, 14, 28, 7, 12, 27, 15, 36, 20, 13, 19, 40, 48, 41, 51, 53, 42, 44, 49, 43, 46, 45, 47, 50]


In [None]:
# Agrupar por 'id_odsp' y 'event_type' y contar las ocurrencias
df_grouped = df.groupby(['id_odsp', 'event_type']).size().reset_index(name='sum')

# Mostrar el resultado
print(df_grouped)


         id_odsp  event_type  sum
0      004f4ING/           1   21
1      004f4ING/           2    9
2      004f4ING/           3   18
3      004f4ING/           4    2
4      004f4ING/           6    1
...          ...         ...  ...
59422  zyzdxP10/           4    4
59423  zyzdxP10/           7    6
59424  zyzdxP10/           8   29
59425  zyzdxP10/           9    7
59426  zyzdxP10/          10    1

[59427 rows x 3 columns]


In [None]:
df_grouped.head()

Unnamed: 0,id_odsp,event_type,value
0,004f4ING/,1,21
1,004f4ING/,2,9
2,004f4ING/,3,18
3,004f4ING/,4,2
4,004f4ING/,6,1
