The management of conferences is handled for leagues such as the American championship (e.g., East/West), while in leagues like the Italian one, a single group or multiple groups (e.g., Group A, B) can be specified depending on the context. This is controlled through the team.csv and conference.csv files.

In [None]:
import pandas as pd
import numpy as np
from collections import Counter

# Leggi i file CSV
plays = pd.read_csv("", # path of Read File Output  
                     usecols=["match_id", "set_number", "point_id", "home_team_score", "visiting_team_score", "team_touch_id",
                              "team", "serving_team", "point_won_by", "skill", "evaluation_code", 
                              "team_id", "home_team_id", "visiting_team_id", "player_id", "player_name", 
                              "start_zone", "end_zone", "start_coordinate_x", "start_coordinate_y", 
                              "end_coordinate_x", "end_coordinate_y", "attack_code","point",
                              "home_setter_position", "visiting_setter_position",
                              "home_player_id1", "home_player_id2", "home_player_id3", "home_player_id4", 
                              "home_player_id5", "home_player_id6", "home_p1", "home_p2", "home_p3", 
                              "home_p4", "home_p5", "home_p6", "visiting_p1", "visiting_p2", "visiting_p3", 
                              "visiting_p4", "visiting_p5", "visiting_p6", 
                              "visiting_player_id1", "visiting_player_id2", "visiting_player_id3", 
                              "visiting_player_id4", "visiting_player_id5", "visiting_player_id6", "code", "role","split"],
                     na_values=[""])

# Leggi i team e conference
team = pd.read_csv("/Users/acco/Desktop/Catene Di Markov/Python/data/team.csv", dtype={"team_id": str})
conference = pd.read_csv("/Users/acco/Desktop/Catene Di Markov/Python/data/conference.csv", dtype={"conference_id": str})

team_conf = (
    plays
    .drop_duplicates(subset=["match_id"])  # Equivalent to dplyr::distinct
    .rename(columns={"home_team_id": "team_id_home", "visiting_team_id": "team_id_away"})  # Rinomina le colonne
    .merge(team, left_on="team_id_home", right_on="team_id", how="left")  # Join per home team
    .rename(columns={"conference_id": "conf_id_home"})  # Rinomina la colonna conference_id
    .merge(team, left_on="team_id_away", right_on="team_id", how="left")  # Join per away team
    .rename(columns={"conference_id": "conf_id_away"})  # Rinomina la colonna conference_id
    .loc[:, ["match_id", "team_id_home", "team_id_away", "conf_id_home", "conf_id_away"]]  # Seleziona le colonne finali
)

# Filtra i giocatori con id valido
player = plays[plays['player_id'].notna() & (plays['player_id'] != 'unknown player')] \
    .groupby(['player_id', 'player_name', 'team_id']) \
    .size() \
    .reset_index(name="n") \
    .sort_values(by='n', ascending=False) \
    .drop_duplicates(subset='player_id', keep='first') \
    .drop(columns="n")

# Visualizza i dati elaborati
print(team_conf.head())
print(player.head())

# Salva i risultati in file CSV
team_conf.to_csv('team_conf.csv', index=False)
player.to_csv('player.csv', index=False)

In [None]:
def cumpaste(x, sep="."):
    # Joins elements with a "." separator, maintaining the cumulative structure
    return [x[0]] + [sep.join(x[:i+1]) for i in range(1, len(x))]

# Create the `contact` DataFrame
contact = (
    plays
    .merge(team_conf, on="match_id", how="left")  # left_join
    .assign(
        team_id_offense=lambda df: df["team_id"],
        team_id_defense=lambda df: np.where(
            df["team_id"] == df["team_id_home"], df["team_id_away"], df["team_id_home"]
        ),
        conf_id_offense=lambda df: np.where(
            df["team_id"] == df["team_id_home"], df["conf_id_home"], df["conf_id_away"]
        ),
        conf_id_defense=lambda df: np.where(
            df["team_id"] == df["team_id_home"], df["conf_id_away"], df["conf_id_home"]
        ),
        serve_receive=lambda df: np.where(df["team_id"] == df['home_team_id'], "H", "V"),
        is_volley_end=lambda df: df["point"].shift(-1).fillna(True).astype(bool) | 
                                  (df["team_touch_id"] != df["team_touch_id"].shift(-1)),
        abbrev=lambda df: np.where(
            df["point"], "P",
            np.where(
                df["skill"].isna() | df["skill"].str.contains("Unknown"), np.nan,
                np.where(
                    df["skill"] == "Serve", "SV",
                    np.where(
                        df["skill"] == "Attack", "A" + df["attack_code"].astype(str),
                        df["skill"].str[0] + df["evaluation_code"].astype(str)
                    )
                )
            )
        ),
        player_id_lead_1=lambda df: df["player_id"].shift(-1),
        skill_lead_1=lambda df: df["skill"].shift(-1),
        evaluation_code_lead_1=lambda df: df["evaluation_code"].shift(-1)
    )
    .dropna(subset=["abbrev", "serving_team"])  # Filter rows
)

def create_state(group):
    # List of skill abbreviations
    abbrev_list = group['abbrev'].tolist()
    # Create states by cumulatively concatenating abbreviations
    states = cumpaste(abbrev_list, sep=".")
    # Assign the full state string to each row
    group['state'] = [f"{group['serve_receive'].iloc[0]}_{state}" for state in states]
    group['num_contacts'] = len(group['abbrev'])
    return group

# Apply `create_state` to each group
contact = contact.groupby(["match_id", "point_id", "team_touch_id", "point"], group_keys=False).apply(create_state)

# Reset index
contact = contact.reset_index(drop=True)

In [None]:
contact['match_numeric'] = contact['match_id'].astype('category').cat.codes

contact['rally_id'] = contact.apply(
    lambda x: f"{x['match_numeric']}_{x['set_number']}_{x['point_id']}",
    axis=1
)

In [None]:
rows = []

for rid, group in contact.groupby('rally_id'):
    # group contiene tutte le righe di un singolo rally, già in ordine
    states = group['state'].tolist()
    
    # Scorriamo gli stati in coppia (stato_i, stato_i+1)
    for i in range(len(states) - 1):
        current_state = states[i]
        next_state    = states[i+1]
        rows.append((rid, current_state, next_state))

transitions = pd.DataFrame(rows, columns=['rally_id', 'current_state', 'next_state'])

In [None]:
pair_counts = Counter(zip(transitions['current_state'], transitions['next_state']))
state_counts = Counter(transitions['current_state'])

# Estraggo l’elenco di tutti gli stati (current e next)
all_states = set(transitions['current_state']).union(set(transitions['next_state']))
all_states = sorted(all_states)  # ordiniamoli per coerenza

# Creiamo una matrice di transizione come DataFrame
transition_matrix = pd.DataFrame(
    0.0, 
    index=all_states, 
    columns=all_states
)

# Riempiamo la matrice con le probabilità
for (cs, ns), c in pair_counts.items():
    transition_matrix.loc[cs, ns] = c / state_counts[cs]


In [None]:
transition_matrix.loc['H_P', 'H_P'] = 1.0
transition_matrix.loc['V_P', 'V_P'] = 1.0

In [None]:
# Assume 'transition_matrix' is a DataFrame with rows/columns as chain states

# 1. List of all states
states_list = list(transition_matrix.index)

# 2. Initialize absorption probability vectors:
#    vH = prob. of absorption into 'H_P' from state s
#    vV = prob. of absorption into 'V_P' from state s
vH = {s: 0.0 for s in states_list}
vV = {s: 0.0 for s in states_list}

# Set absorbing states manually
vH['H_P'] = 1.0
vV['H_P'] = 0.0
vH['V_P'] = 0.0
vV['V_P'] = 1.0

# 3. Iterative function to update absorption probabilities
def iterate_absorption(vH, vV, trans_mat):
    new_vH, new_vV = {}, {}
    for s in states_list:
        if s in ['H_P', 'V_P']:
            new_vH[s] = vH[s]
            new_vV[s] = vV[s]
        else:
            row = trans_mat.loc[s].values
            col_names = trans_mat.columns
            new_vH[s] = np.sum(row * np.array([vH[t] for t in col_names]))
            new_vV[s] = np.sum(row * np.array([vV[t] for t in col_names]))
    return new_vH, new_vV

# 4. Convergence parameters
epsilon = 1e-8
max_iter = 1000

# 5. Iterate until convergence
for i in range(max_iter):
    old_vH, old_vV = vH.copy(), vV.copy()
    vH, vV = iterate_absorption(vH, vV, transition_matrix)

    diff_H = max(abs(vH[s] - old_vH[s]) for s in states_list)
    diff_V = max(abs(vV[s] - old_vV[s]) for s in states_list)

    if i % 10 == 0:
        print(f"Iteration {i}: diff_H = {diff_H:.2e}, diff_V = {diff_V:.2e}")
    
    if diff_H < epsilon and diff_V < epsilon:
        print(f"Converged at iteration {i}")
        break

# 6. Create DataFrame with final results
results = [(s, vH[s], vV[s]) for s in states_list]
results_df = pd.DataFrame(results, columns=['state', 'prob_H', 'prob_V'])