In [None]:
import os
import pandas as pd
from datavolley import read_dv ## Version available on personal github on https://github.com/AndreaAccornero/pydatavolley
import re
import numpy as np

path = ""; ## directory with one or more files 
os.chdir(path)

# DataFrame vuoto
df = pd.DataFrame()


# Functions to read DataVolley files
def dv_read(file_path):
    dv_instance = read_dv.DataVolley(file_path)
    return dv_instance.get_plays()

def dv_date(file_path):
    dv_instance = read_dv.DataVolley(file_path)
    return dv_instance.match_info.day

# Debugging
files_success = 0
files_failure = 0
successful_files = []
failed_files = []

# Cycle through all files in the directory
for file_name in os.listdir('.'):
    
    file_path = os.path.join('.', file_name)

    print(f"Reading file: {file_path}")
    try:
        match_plays = dv_read(file_path)

        match_plays['date'] = dv_date(file_path)
        match_plays['league'] = ''
        match_plays['file_name'] = file_name
        match_plays['set_id'] = match_plays.apply(lambda row: f"{file_name}/{row['set_number']}", axis=1)
        match_plays['touch_id'] = match_plays.apply(lambda row: f"{row.name}", axis=1)

        # Extracting the match date and girone from the file name
        giornata_match = '' #according to the files 
        girone_match = '' #according to the files 

        df = pd.concat([df, match_plays], ignore_index=True)

        files_success += 1
        successful_files.append(file_name)

    except Exception as e:
        # Handle the error
        files_failure += 1
        failed_files.append(file_name)
        print(f"Error processing file {file_path}: {e}")

In [None]:
# Initialize player ID columns for home and visiting teams
for i in range(1, 7):
    df[f'home_player_id{i}'] = None
    df[f'visiting_player_id{i}'] = None

# Columns to check for home and visiting players
home_columns = ["home_p1", "home_p2", "home_p3", "home_p4", "home_p5", "home_p6"]
visiting_columns = ["visiting_p1", "visiting_p2", "visiting_p3", "visiting_p4", "visiting_p5", "visiting_p6"]

# Iterate through each row in the DataFrame
for index, row in df.iterrows():
    player_id = row['player_id']
    player_number = row['player_number']
    team = row['team']
    
    found_position = None

    # Determine if the team is home or visiting
    if team == row['home_team']:
        columns_to_check = home_columns
        prefix = 'home'
    elif team == row['visiting_team']:
        columns_to_check = visiting_columns
        prefix = 'visiting'
    else:
        columns_to_check = []

    # Check which column matches the player's number
    for col in columns_to_check:
        if row[col] == player_number:
            found_position = col
            break
    
    # If a match is found, update the corresponding player ID column
    if found_position:
        position_index = int(found_position.split('_')[1][1])  # Extract position index (e.g., p1 -> 1)
        df.at[index, f'{prefix}_player_id{position_index}'] = player_id
        print(f"Player ID: {player_id}, Player Number: {player_number} is in {found_position}")
    else:
        print(f"Player ID: {player_id}, Player Number: {player_number} not found in any position.")


In [None]:
# Initialize team_touch_id
tid = 0  # Starts from 1 for the first touch
df['team_touch_id'] = 0  # Empty column for team_touch_id

# Sort the DataFrame by matchday and rally number
df = df.sort_values(by=["giornata", "rally_number"]).reset_index(drop=True)

# Iterate through the DataFrame
for k in range(1, len(df)):
    # If the point_id changes, reset and start a new rally
    if df.at[k, 'point_id'] != df.at[k-1, 'point_id']:
        tid += 1  # Increment team_touch_id when point_id changes (new rally)
    # If the team changes, increment team_touch_id
    elif df.at[k, 'team'] != df.at[k-1, 'team']:
        tid += 1  # Increment team_touch_id when the team changes

    # Assign the team_touch_id to the current row
    df.at[k, 'team_touch_id'] = tid


In [None]:
df['point'] = df['skill'] == 'Point'
df.to_csv('', index=False) ## your path 