In [1]:
import pandas as pd
import numpy as np
from math import sqrt

# Define the rescaling function
def rescale(value, old_min, old_max, new_min, new_max):
    if pd.notna(value):  # Only rescale if the value is not NaN
        return (value - old_min) / (old_max - old_min) * (new_max - new_min) + new_min
    return value  # Return NaN as-is

# Load the dataset
file_path = "Italy-Serie-A/2024-2025.csv"
df = pd.read_csv(file_path)

# Extract the season from the file name
season = file_path.split("/")[-1].replace(".csv", "")

# Filter the dataset for the specified match
match_id = 1834868
match_df = df[df['matchId'] == match_id]

# Rescale the coordinates
scaling_map = {
    'x': (0, 100, 0, 105),
    'y': (0, 100, 0, 69),
    'endX': (0, 100, 0, 105),
    'endY': (0, 100, 0, 69)
}
for col, (old_min, old_max, new_min, new_max) in scaling_map.items():
    match_df[col] = match_df[col].apply(rescale, args=(old_min, old_max, new_min, new_max))

# Initialize the carry DataFrame
carries = []
carry_id = 1

# Function to calculate Euclidean distance
def calculate_distance(x1, y1, x2, y2):
    return sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)

# Identify carries based on conditions
for i in range(len(match_df) - 1):
    event1 = match_df.iloc[i]
    event2 = match_df.iloc[i + 1]

    # Condition 1: Consecutive events by the same team or player
    if (
        event1['teamId'] == event2['teamId'] and
        event1['type'] in ['Pass', 'TakeOn', 'Tackle', 'BallRecovery'] and
        event1['outcomeType'] == 'Successful' and
        (
            event2['type'] in ['Pass', 'TakeOn', 'Tackle', 'Dispossessed', 'Foul'] or 
            event2['isShot'] == True
        )
    ):
        x, y = event1['endX'], event1['endY']
        endX, endY = event2['x'], event2['y']
        
        if calculate_distance(x, y, endX, endY) >= 5:
            carries.append({
                'season': season,
                'matchId': match_id,
                'teamId': event2['teamId'],
                'playerId': event2['playerId'],
                'playerName': event2['playerName'],
                'type': 'Carry',
                'x': x,
                'y': y,
                'endX': endX,
                'endY': endY,
                'carryId': carry_id
            })
            carry_id += 1

    # Condition 2: TakeOn in a sequence of three events
    if i < len(match_df) - 2:
        event3 = match_df.iloc[i + 2]
        if (
            event1['teamId'] != event2['teamId'] and
            event2['teamId'] != event3['teamId'] and
            event2['type'] == 'TakeOn' and
            pd.notna(event1['endX']) and pd.notna(event1['endY'])
        ):
            x, y = 105 - event1['endX'], 69 - event1['endY']
            endX, endY = 105 - event3['x'], 69 - event3['y']

            if calculate_distance(x, y, endX, endY) >= 5:
                carries.append({
                    'season': season,
                    'matchId': match_id,
                    'teamId': event2['teamId'],
                    'playerId': event2['playerId'],
                    'playerName': event2['playerName'],
                    'type': 'Carry',
                    'x': x,
                    'y': y,
                    'endX': endX,
                    'endY': endY,
                    'carryId': carry_id
                })
                carry_id += 1

    # Condition 3: Events between opposing teams
    if (
        event1['teamId'] != event2['teamId'] and
        event1['type'] == 'BallRecovery' and
        event2['type'] in ['BallRecovery', 'Tackle']
    ):
        x, y = event1['x'], event1['y']
        endX, endY = 105 - event2['x'], 69 - event2['y']

        if calculate_distance(x, y, endX, endY) >= 5:
            carries.append({
                'season': season,
                'matchId': match_id,
                'teamId': event1['teamId'],
                'playerId': event1['playerId'],
                'playerName': event1['playerName'],
                'type': 'Carry',
                'x': x,
                'y': y,
                'endX': endX,
                'endY': endY,
                'carryId': carry_id
            })
            carry_id += 1

# Create the carries DataFrame
carries_df = pd.DataFrame(carries)

# Display the carries DataFrame
print(carries_df)

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  match_df[col] = match_df[col].apply(rescale, args=(old_min, old_max, new_min, new_max))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  match_df[col] = match_df[col].apply(rescale, args=(old_min, old_max, new_min, new_max))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 

        season  matchId  teamId  playerId             playerName   type  \
0    2024-2025  1834868      73  329854.0  Lucas Martínez Quarta  Carry   
1    2024-2025  1834868      73  248144.0         Sofyan Amrabat  Carry   
2    2024-2025  1834868      73  329854.0  Lucas Martínez Quarta  Carry   
3    2024-2025  1834868      73  356733.0         Andrea Colpani  Carry   
4    2024-2025  1834868   24341  363978.0         Adrián Bernabé  Carry   
..         ...      ...     ...       ...                    ...    ...   
150  2024-2025  1834868   24341  447486.0     Alessandro Circati  Carry   
151  2024-2025  1834868      73  322094.0         Jonathan Ikoné  Carry   
152  2024-2025  1834868   24341  123873.0          Wylan Cyprien  Carry   
153  2024-2025  1834868   24341  343512.0        Emanuele Valeri  Carry   
154  2024-2025  1834868   24341  404497.0     Matteo Cancellieri  Carry   

          x       y    endX    endY  carryId  
0    30.765  16.629  33.810  32.637        1  
1    

In [1]:
import pandas as pd
import numpy as np
from math import sqrt

# Define the rescaling function
def rescale(value, old_min, old_max, new_min, new_max):
    if pd.notna(value):  # Only rescale if the value is not NaN
        return round((value - old_min) / (old_max - old_min) * (new_max - new_min) + new_min, 1)
    return value  # Return NaN as-is

# Function to calculate Euclidean distance
def calculate_distance(x1, y1, x2, y2):
    if pd.notna(x1) and pd.notna(y1) and pd.notna(x2) and pd.notna(y2):
        #return sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
        return round(sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2), 1)
    return 0

# Load the dataset
file_path = "Italy-Serie-A/2024-2025.csv"
try:
    df = pd.read_csv(file_path, low_memory=False)
    print("Dataset loaded successfully.")
except Exception as e:
    print(f"Error loading dataset: {e}")
    df = pd.DataFrame()

# Verify dataset
if not df.empty:
    print(df.head())

# Extract the season from the file name
season = file_path.split("/")[-1].replace(".csv", "")

# Filter the dataset for the specified match
match_id = 1834868
if 'matchId' in df.columns and match_id in df['matchId'].values:
    match_df = df[df['matchId'] == match_id].copy()
else:
    print(f"No records found for matchId {match_id}")
    match_df = pd.DataFrame()

# If the match data is not empty, proceed
if not match_df.empty:
    carries = []
    carry_id = 1

    # Identify carries based on conditions
    for i in range(len(match_df) - 1):
        event1 = match_df.iloc[i]
        event2 = match_df.iloc[i + 1]

        # Condition 1: Consecutive events by the same team or player
        if (
            event1['teamId'] == event2['teamId'] and
            event1['type'] in ['Pass', 'TakeOn', 'Tackle', 'BallRecovery'] and
            event1['outcomeType'] == 'Successful' and
            (
                event2['type'] in ['Pass', 'TakeOn', 'Tackle', 'Dispossessed', 'Foul'] or 
                event2['isShot'] == True
            )
        ):
            x, y = rescale(event1['endX'], 0, 100, 0, 105), rescale(event1['endY'], 0, 100, 0, 69)
            endX, endY = rescale(event2['x'], 0, 100, 0, 105), rescale(event2['y'], 0, 100, 0, 69)
            length = calculate_distance(x, y, endX, endY)

            if length >= 5:
                carries.append({
                    'season': season,
                    'matchId': match_id,
                    'teamId': event2['teamId'],
                    'playerId': event2['playerId'],
                    'playerName': event2['playerName'],
                    'type': 'Carry',
                    'x': x,
                    'y': y,
                    'endX': endX,
                    'endY': endY,
                    'carryId': carry_id,
                    'carryLength': length
                })
                carry_id += 1

        # Condition 2: TakeOn in a sequence of three events
        if i < len(match_df) - 2:
            event3 = match_df.iloc[i + 2]
            if (
                event1['teamId'] != event2['teamId'] and
                event2['teamId'] != event3['teamId'] and
                event2['type'] == 'TakeOn' and
                pd.notna(event1['endX']) and pd.notna(event1['endY'])
            ):
                x, y = rescale(105 - event1['endX'], 0, 100, 0, 105), rescale(69 - event1['endY'], 0, 100, 0, 69)
                endX, endY = rescale(105 - event3['x'], 0, 100, 0, 105), rescale(69 - event3['y'], 0, 100, 0, 69)
                length = calculate_distance(x, y, endX, endY)

                if length >= 5:
                    carries.append({
                        'season': season,
                        'matchId': match_id,
                        'teamId': event2['teamId'],
                        'playerId': event2['playerId'],
                        'playerName': event2['playerName'],
                        'type': 'Carry',
                        'x': x,
                        'y': y,
                        'endX': endX,
                        'endY': endY,
                        'carryId': carry_id,
                        'carryLength': length
                    })
                    carry_id += 1

        # Condition 3: Events between opposing teams
        if (
            event1['teamId'] != event2['teamId'] and
            event1['type'] == 'BallRecovery' and
            event2['type'] in ['BallRecovery', 'Tackle']
        ):
            x, y = rescale(event1['x'], 0, 100, 0, 105), rescale(event1['y'], 0, 100, 0, 69)
            endX, endY = rescale(105 - event2['x'], 0, 100, 0, 105), rescale(69 - event2['y'], 0, 100, 0, 69)
            length = calculate_distance(x, y, endX, endY)

            if length >= 5:
                carries.append({
                    'season': season,
                    'matchId': match_id,
                    'teamId': event1['teamId'],
                    'playerId': event1['playerId'],
                    'playerName': event1['playerName'],
                    'type': 'Carry',
                    'x': x,
                    'y': y,
                    'endX': endX,
                    'endY': endY,
                    'carryId': carry_id,
                    'carryLength': length
                })
                carry_id += 1

    # Convert carry data to DataFrame
    carries_df = pd.DataFrame(carries)

    # Display the carries DataFrame
    print(carries_df)
else:
    print("No carry-related data found.")

Dataset loaded successfully.
             id  eventId  minute  second  teamId h_a     x     y  \
0  2.709862e+09        2       0     0.0      73   a   0.0   0.0   
1  2.709862e+09        2       0     0.0   24341   h   0.0   0.0   
2  2.709862e+09        3       0     0.0      73   a  50.1  50.4   
3  2.709862e+09        4       0     2.0      73   a  41.8  51.3   
4  2.709862e+09        5       0     4.0      73   a  39.2  33.4   

   expandedMinute     period  ... penaltyShootoutSavedGK  \
0               0  FirstHalf  ...                      0   
1               0  FirstHalf  ...                      0   
2               0  FirstHalf  ...                      0   
3               0  FirstHalf  ...                      0   
4               0  FirstHalf  ...                      0   

  penaltyShootoutConcededGK throwIn subOn  subOff  defensiveThird midThird  \
0                         0       0     0       0               0        0   
1                         0       0     0    

In [2]:
output_file = 'carries.csv'
carries_df.to_csv(output_file, index=False, mode='w')

In [1]:
import pandas as pd
import os

# Path to the main directory containing subfolders with CSV files
main_dir = os.getcwd()

# Output file to save results
output_file = 'carries.csv'

# Check if output file exists
file_exists = os.path.isfile(output_file)

from math import sqrt

# Define the rescaling function
def rescale(value, old_min, old_max, new_min, new_max):
    if pd.notna(value):  # Only rescale if the value is not NaN
        return round((value - old_min) / (old_max - old_min) * (new_max - new_min) + new_min, 1)
    return value  # Return NaN as-is

# Function to calculate Euclidean distance
def calculate_distance(x1, y1, x2, y2):
    if pd.notna(x1) and pd.notna(y1) and pd.notna(x2) and pd.notna(y2):
        #return sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
        return round(sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2), 1)
    return 0


carry_id = 1

# Iterate through each subfolder in the main directory
for subfolder in os.listdir(main_dir):
    subfolder_path = os.path.join(main_dir, subfolder)

    # Check if it is a directory
    if os.path.isdir(subfolder_path):
        print(f"Processing folder: {subfolder}")

        # Iterate through all CSV files in the subfolder
        for file in os.listdir(subfolder_path):
            if file.endswith('.csv'):
                file_path = os.path.join(subfolder_path, file)
                print(f"Processing file: {file}")

                # Extract the season from the file name
                season = file.replace(".csv", "")

                # Read the CSV file
                df = pd.read_csv(file_path)

                # Add the 'season' column with the same value for all rows
                df['season'] = season

                # Initialize an empty list for all match results
                season_carries = []

                # Iterate over each unique matchId in the dataset
                for match_id in df['matchId'].unique():
                    match_df = df[df['matchId'] == match_id]
                    match_carries = []

                    # Identify carries based on conditions
                    for i in range(len(match_df) - 1):
                        event1 = match_df.iloc[i]
                        event2 = match_df.iloc[i + 1]

                        # Condition 1: Consecutive events by the same team or player
                        if (
                            event1['teamId'] == event2['teamId'] and
                            event1['type'] in ['Pass', 'TakeOn', 'Tackle', 'BallRecovery'] and
                            event1['outcomeType'] == 'Successful' and
                            (
                                event2['type'] in ['Pass', 'TakeOn', 'Tackle', 'Dispossessed', 'Foul'] or 
                                event2['isShot'] == 1 #True value
                            )
                        ):
                            x, y = rescale(event1['endX'], 0, 100, 0, 105), rescale(event1['endY'], 0, 100, 0, 69)
                            endX, endY = rescale(event2['x'], 0, 100, 0, 105), rescale(event2['y'], 0, 100, 0, 69)
                            length = calculate_distance(x, y, endX, endY)

                            if length >= 5:
                                match_carries.append({
                                    'season': season,
                                    'matchId': match_id,
                                    'teamId': event2['teamId'],
                                    'playerId': event2['playerId'],
                                    'playerName': event2['playerName'],
                                    'type': 'Carry',
                                    'x': x,
                                    'y': y,
                                    'endX': endX,
                                    'endY': endY,
                                    'carryId': carry_id,
                                    'carryLength': length
                                })
                                carry_id += 1

                        # Condition 2: TakeOn in a sequence of three events
                        if i < len(match_df) - 2:
                            event3 = match_df.iloc[i + 2]
                            if (
                                event1['teamId'] != event2['teamId'] and
                                event2['teamId'] != event3['teamId'] and
                                event2['type'] == 'TakeOn' and
                                pd.notna(event1['endX']) and pd.notna(event1['endY'])
                            ):
                                x, y = rescale(105 - event1['endX'], 0, 100, 0, 105), rescale(69 - event1['endY'], 0, 100, 0, 69)
                                endX, endY = rescale(105 - event3['x'], 0, 100, 0, 105), rescale(69 - event3['y'], 0, 100, 0, 69)
                                length = calculate_distance(x, y, endX, endY)

                                if length >= 5:
                                    match_carries.append({
                                        'season': season,
                                        'matchId': match_id,
                                        'teamId': event2['teamId'],
                                        'playerId': event2['playerId'],
                                        'playerName': event2['playerName'],
                                        'type': 'Carry',
                                        'x': x,
                                        'y': y,
                                        'endX': endX,
                                        'endY': endY,
                                        'carryId': carry_id,
                                        'carryLength': length
                                    })
                                    carry_id += 1

                        # Condition 3: Events between opposing teams
                        if (
                            event1['teamId'] != event2['teamId'] and
                            event1['type'] == 'BallRecovery' and
                            event2['type'] in ['BallRecovery', 'Tackle']
                        ):
                            x, y = rescale(event1['x'], 0, 100, 0, 105), rescale(event1['y'], 0, 100, 0, 69)
                            endX, endY = rescale(105 - event2['x'], 0, 100, 0, 105), rescale(69 - event2['y'], 0, 100, 0, 69)
                            length = calculate_distance(x, y, endX, endY)

                            if length >= 5:
                                match_carries.append({
                                    'season': season,
                                    'matchId': match_id,
                                    'teamId': event1['teamId'],
                                    'playerId': event1['playerId'],
                                    'playerName': event1['playerName'],
                                    'type': 'Carry',
                                    'x': x,
                                    'y': y,
                                    'endX': endX,
                                    'endY': endY,
                                    'carryId': carry_id,
                                    'carryLength': length
                                })
                                carry_id += 1

                    # Add the match carries to the season carries list
                    season_carries.extend(match_carries)

                # Convert carry data to DataFrame
                carries_df = pd.DataFrame(season_carries)
                # Save to the output CSV file
                if not file_exists:
                    # Create the file and save the data
                    carries_df.to_csv(output_file, index=False, mode='w')
                    file_exists = True  # Now the file exists
                else:
                    # Append to the existing file
                    carries_df.to_csv(output_file, index=False, header=False, mode='a')

                # Release memory for the current DataFrame
                del df, carries_df
                print(f"Finished processing file: {file}")

print("All files processed successfully.")


Processing folder: .git
Processing folder: Belgium-Jupiler-Pro-League
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Brazil-Brasileirão
Processing file: 2020.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020.csv
Processing file: 2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021.csv
Processing file: 2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022.csv
Processing file: 2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023.csv
Processing file: 2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024.csv
Processing folder: England-Championship
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: England-League-One
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: England-League-Two
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: England-Premier-League
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: France-Ligue-1
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Germany-2-Bundesliga
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Germany-Bundesliga
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Italy-Serie-A
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Netherlands-Eredivisie
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Portugal-Liga-Portugal
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Scotland-Premiership
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Spain-LaLiga
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Turkey-SuperLig
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: USA-Major-League-Soccer
Processing file: 2020.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020.csv
Processing file: 2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021.csv
Processing file: 2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022.csv
Processing file: 2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023.csv
Processing file: 2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024.csv
All files processed successfully.
