In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import os
import csv
from IPython.display import display

In [None]:
# Path to the main directory containing subfolders with CSV files
main_dir = os.getcwd()

# Output file to save results
output_file = 'open-play-shots.csv'

# Check if output file exists
file_exists = os.path.isfile(output_file)

In [None]:
# Define the rescaling function
def rescale(value, old_min, old_max, new_min, new_max):
    if pd.notna(value):  # Only rescale if the value is not NaN
        return (value - old_min) / (old_max - old_min) * (new_max - new_min) + new_min
    return value  # Return NaN as-is

# Apply the scaling to the specified columns
scaling_map = {
    'x': (0, 100, 0, 105),     # Rescale from [0,100] to [0,105]
    'y': (0, 100, 0, 69),      # Rescale from [0,100] to [0,69]
    'endX': (0, 100, 0, 105),  # Rescale from [0,100] to [0,105]
    'endY': (0, 100, 0, 69)    # Rescale from [0,100] to [0,69]
}

In [None]:
# Iterate through each subfolder in the main directory
for subfolder in os.listdir(main_dir):
    subfolder_path = os.path.join(main_dir, subfolder)

    # Check if it is a directory
    if os.path.isdir(subfolder_path):
        print(f"Processing folder: {subfolder}")

        # Iterate through all CSV files in the subfolder
        for file in os.listdir(subfolder_path):
            if file.endswith('.csv'):
                file_path = os.path.join(subfolder_path, file)
                print(f"Processing file: {file}")

                # Extract the season from the file name
                season = file.replace(".csv", "")

                # Read the CSV file
                df = pd.read_csv(file_path)

                # Add the 'season' column with the same value for all rows
                df['season'] = season

                # Convert 'startDate' from object into a datetype
                df['startDate'] = pd.to_datetime(df['startDate'], errors='coerce')
                # Ensure it contains only the date (year-month-day)
                df['startDate'] = df['startDate'].dt.date

                # Define a function to convert the misinterpreted time to the desired format ('score', 'ftScore', 'htScore', 'etScore' are interpreted as times instead than scores)
                def fix_score_format(value):
                    if isinstance(value, str) and ':' in value:
                        try:
                            hour, minute = value.split(':')[:2]  # Extract hour and minute
                            return f"{hour}-{minute}"
                        except ValueError:
                            return value  # Return the original value if parsing fails
                    return value  # Return the value if it's not a string or doesn't contain ':'

                # Apply the function to the relevant columns
                for col in ['score', 'ftScore', 'htScore','etScore']:
                    df[col] = df[col].apply(fix_score_format).astype(str)

                # Replace 'False' and '0' with 'None' in the 'cardType' column
                df['cardType'] = df['cardType'].replace({'False': 'None', '0': 'None'})

                # Convert numerical 1/0 into True/False
                # List of columns to be cast to booleans
                columns_to_cast = [
                    'isTouch', 'isShot', 'isGoal', 'shotSixYardBox', 'shotPenaltyArea', 'shotOboxTotal',
                    'shotOpenPlay', 'shotCounter', 'shotSetPiece', 'shotDirectCorner', 'shotOffTarget',
                    'shotOnPost', 'shotOnTarget', 'shotsTotal', 'shotBlocked', 'shotRightFoot', 'shotLeftFoot',
                    'shotHead', 'shotObp', 'goalSixYardBox', 'goalPenaltyArea', 'goalObox', 'goalOpenPlay',
                    'goalCounter', 'goalSetPiece', 'penaltyScored', 'goalOwn', 'goalNormal', 'goalRightFoot',
                    'goalLeftFoot', 'goalHead', 'goalObp', 'shortPassInaccurate', 'shortPassAccurate', 
                    'passCorner', 'passCornerAccurate', 'passCornerInaccurate', 'passFreekick', 'passBack',
                    'passForward', 'passLeft', 'passRight', 'keyPassLong', 'keyPassShort', 'keyPassCross',
                    'keyPassCorner', 'keyPassThroughball', 'keyPassFreekick', 'keyPassThrowin', 'keyPassOther',
                    'assistCross', 'assistCorner', 'assistThroughball', 'assistFreekick', 'assistThrowin', 
                    'assistOther', 'dribbleLost', 'dribbleWon', 'challengeLost', 'interceptionWon', 'clearanceHead',
                    'outfielderBlock', 'passCrossBlockedDefensive', 'outfielderBlockedPass', 'offsideGiven', 
                    'offsideProvoked', 'foulGiven', 'foulCommitted', 'yellowCard', 'voidYellowCard', 'secondYellow',
                    'redCard', 'turnover', 'dispossessed', 'saveLowLeft', 'saveHighLeft', 'saveLowCentre', 
                    'saveHighCentre', 'saveLowRight', 'saveHighRight', 'saveHands', 'saveFeet', 'saveObp', 
                    'saveSixYardBox', 'savePenaltyArea', 'saveObox', 'keeperDivingSave', 'standingSave', 'closeMissHigh',
                    'closeMissHighLeft', 'closeMissHighRight', 'closeMissLeft', 'closeMissRight', 'shotOffTargetInsideBox',
                    'touches', 'assist', 'ballRecovery', 'clearanceEffective', 'clearanceTotal', 'clearanceOffTheLine', 
                    'dribbleLastman', 'errorLeadsToGoal', 'errorLeadsToShot', 'intentionalAssist', 'interceptionAll', 
                    'interceptionIntheBox', 'keeperClaimHighLost', 'keeperClaimHighWon', 'keeperClaimLost', 'keeperClaimWon', 
                    'keeperOneToOneWon', 'parriedDanger', 'parriedSafe', 'collected', 'keeperPenaltySaved', 'keeperSaveInTheBox',
                    'keeperSaveTotal', 'keeperSmother', 'keeperSweeperLost', 'keeperMissed', 'passAccurate', 'passBackZoneInaccurate',
                    'passForwardZoneAccurate', 'passInaccurate', 'passAccuracy', 'cornerAwarded', 'passKey', 'passChipped', 
                    'passCrossAccurate', 'passCrossInaccurate', 'passLongBallAccurate', 'passLongBallInaccurate', 
                    'passThroughBallAccurate', 'passThroughBallInaccurate', 'passThroughBallInacurate', 'passFreekickAccurate', 
                    'passFreekickInaccurate', 'penaltyConceded', 'penaltyMissed', 'penaltyWon', 'passRightFoot', 'passLeftFoot',
                    'passHead', 'sixYardBlock', 'tackleLastMan', 'tackleLost', 'tackleWon', 'cleanSheetGK', 'cleanSheetDL', 
                    'cleanSheetDC', 'cleanSheetDR', 'cleanSheetDML', 'cleanSheetDMC', 'cleanSheetDMR', 'cleanSheetML', 
                    'cleanSheetMC', 'cleanSheetMR', 'cleanSheetAML', 'cleanSheetAMC', 'cleanSheetAMR', 'cleanSheetFWL', 
                    'cleanSheetFW', 'cleanSheetFWR', 'cleanSheetSub', 'goalConcededByTeamGK', 'goalConcededByTeamDL', 
                    'goalConcededByTeamDC', 'goalConcededByTeamDR', 'goalConcededByTeamDML', 'goalConcededByTeamDMC',
                    'goalConcededByTeamDMR', 'goalConcededByTeamML', 'goalConcededByTeamMC', 'goalConcededByTeamMR', 
                    'goalConcededByTeamAML', 'goalConcededByTeamAMC', 'goalConcededByTeamAMR', 'goalConcededByTeamFWL', 
                    'goalConcededByTeamFW', 'goalConcededByTeamFWR', 'goalConcededByTeamSub', 'goalConcededOutsideBoxGoalkeeper', 
                    'goalScoredByTeamGK', 'goalScoredByTeamDL', 'goalScoredByTeamDC', 'goalScoredByTeamDR', 'goalScoredByTeamDML',
                    'goalScoredByTeamDMC', 'goalScoredByTeamDMR', 'goalScoredByTeamML', 'goalScoredByTeamMC', 'goalScoredByTeamMR',
                    'goalScoredByTeamAML', 'goalScoredByTeamAMC', 'goalScoredByTeamAMR', 'goalScoredByTeamFWL', 
                    'goalScoredByTeamFW', 'goalScoredByTeamFWR', 'goalScoredByTeamSub', 'aerialSuccess', 'duelAerialWon', 
                    'duelAerialLost', 'offensiveDuel', 'defensiveDuel', 'bigChanceMissed', 'bigChanceScored', 'bigChanceCreated', 
                    'overrun', 'successfulFinalThirdPasses', 'punches', 'penaltyShootoutScored', 'penaltyShootoutMissedOffTarget', 
                    'penaltyShootoutSaved', 'penaltyShootoutSavedGK', 'penaltyShootoutConcededGK', 'throwIn', 'subOn', 'subOff', 
                    'defensiveThird', 'midThird', 'finalThird', 'pos'
                ]
                # Cast columns to boolean
                df[columns_to_cast] = df[columns_to_cast].astype(bool)

                # Convert 'isOwnGoal' into a Boolean, where NaN becomes False
                df['isOwnGoal'] = df['isOwnGoal'].fillna(False).astype(bool)

                dtype_dict = {
                    'id': 'int64',
                    'eventId': 'int16',
                    'minute': 'int16',
                    'second': 'int16',
                    'teamId': 'int32',
                    'h_a': 'str',
                    'x': 'float32',
                    'y': 'float32',
                    'expandedMinute': 'int16',
                    'period': 'str',
                    'type': 'str',
                    'outcomeType': 'str',
                    'qualifiers': 'object',
                    'satisfiedEventsTypes': 'object',
                    'isTouch': 'bool',
                    'matchId': 'int32',
                    'startDate': 'object',
                    'startTime': 'object',
                    'score': 'str',
                    'ftScore': 'str',
                    'htScore': 'str',
                    'etScore': 'str',
                    'venueName': 'str',
                    'maxMinute': 'int16',
                    'playerId': 'float32', #to allow for NaN
                    'playerName': 'str',
                    'endX': 'float32',
                    'endY': 'float32',
                    'blockedX': 'float32',
                    'blockedY': 'float32',
                    'goalMouthZ': 'float32',
                    'goalMouthY': 'float32',
                    'isShot': 'bool',
                    'relatedEventId': 'float32', #to allow for NaN
                    'relatedPlayerId': 'float32', #to allow for NaN
                    'isGoal': 'bool',
                    'cardType': 'str',
                    'shotBodyType': 'str',
                    'situation': 'str',
                    'shotSixYardBox': 'bool',
                    'shotPenaltyArea': 'bool',
                    'shotOboxTotal': 'bool',
                    'shotOpenPlay': 'bool',
                    'shotCounter': 'bool',
                    'shotSetPiece': 'bool',
                    'shotDirectCorner': 'bool',
                    'shotOffTarget': 'bool',
                    'shotOnPost': 'bool',
                    'shotOnTarget': 'bool',
                    'shotsTotal': 'bool',
                    'shotBlocked': 'bool',
                    'shotRightFoot': 'bool',
                    'shotLeftFoot': 'bool',
                    'shotHead': 'bool',
                    'shotObp': 'bool',
                    'goalSixYardBox': 'bool',
                    'goalPenaltyArea': 'bool',
                    'goalObox': 'bool',
                    'goalOpenPlay': 'bool',
                    'goalCounter': 'bool',
                    'goalSetPiece': 'bool',
                    'penaltyScored': 'bool',
                    'goalOwn': 'bool',
                    'goalNormal': 'bool',
                    'goalRightFoot': 'bool',
                    'goalLeftFoot': 'bool',
                    'goalHead': 'bool',
                    'goalObp': 'bool',
                    'shortPassInaccurate': 'bool',
                    'shortPassAccurate': 'bool',
                    'passCorner': 'bool',
                    'passCornerAccurate': 'bool',
                    'passCornerInaccurate': 'bool',
                    'passFreekick': 'bool',
                    'passBack': 'bool',
                    'passForward': 'bool',
                    'passLeft': 'bool',
                    'passRight': 'bool',
                    'keyPassLong': 'bool',
                    'keyPassShort': 'bool',
                    'keyPassCross': 'bool',
                    'keyPassCorner': 'bool',
                    'keyPassThroughball': 'bool',
                    'keyPassFreekick': 'bool',
                    'keyPassThrowin': 'bool',
                    'keyPassOther': 'bool',
                    'assistCross': 'bool',
                    'assistCorner': 'bool',
                    'assistThroughball': 'bool',
                    'assistFreekick': 'bool',
                    'assistThrowin': 'bool',
                    'assistOther': 'bool',
                    'dribbleLost': 'bool',
                    'dribbleWon': 'bool',
                    'challengeLost': 'bool',
                    'interceptionWon': 'bool',
                    'clearanceHead': 'bool',
                    'outfielderBlock': 'bool',
                    'passCrossBlockedDefensive': 'bool',
                    'outfielderBlockedPass': 'bool',
                    'offsideGiven': 'bool',
                    'offsideProvoked': 'bool',
                    'foulGiven': 'bool',
                    'foulCommitted': 'bool',
                    'yellowCard': 'bool',
                    'voidYellowCard': 'bool',
                    'secondYellow': 'bool',
                    'redCard': 'bool',
                    'turnover': 'bool',
                    'dispossessed': 'bool',
                    'saveLowLeft': 'bool',
                    'saveHighLeft': 'bool',
                    'saveLowCentre': 'bool',
                    'saveHighCentre': 'bool',
                    'saveLowRight': 'bool',
                    'saveHighRight': 'bool',
                    'saveHands': 'bool',
                    'saveFeet': 'bool',
                    'saveObp': 'bool',
                    'saveSixYardBox': 'bool',
                    'savePenaltyArea': 'bool',
                    'saveObox': 'bool',
                    'keeperDivingSave': 'bool',
                    'standingSave': 'bool',
                    'closeMissHigh': 'bool',
                    'closeMissHighLeft': 'bool',
                    'closeMissHighRight': 'bool',
                    'closeMissLeft': 'bool',
                    'closeMissRight': 'bool',
                    'shotOffTargetInsideBox': 'bool',
                    'touches': 'bool',
                    'assist': 'bool',
                    'ballRecovery': 'bool',
                    'clearanceEffective': 'bool',
                    'clearanceTotal': 'bool',
                    'clearanceOffTheLine': 'bool',
                    'dribbleLastman': 'bool',
                    'errorLeadsToGoal': 'bool',
                    'errorLeadsToShot': 'bool',
                    'intentionalAssist': 'bool',
                    'interceptionAll': 'bool',
                    'interceptionIntheBox': 'bool',
                    'keeperClaimHighLost': 'bool',
                    'keeperClaimHighWon': 'bool',
                    'keeperClaimLost': 'bool',
                    'keeperClaimWon': 'bool',
                    'keeperOneToOneWon': 'bool',
                    'parriedDanger': 'bool',
                    'parriedSafe': 'bool',
                    'collected': 'bool',
                    'keeperPenaltySaved': 'bool',
                    'keeperSaveInTheBox': 'bool',
                    'keeperSaveTotal': 'bool',
                    'keeperSmother': 'bool',
                    'keeperSweeperLost': 'bool',
                    'keeperMissed': 'bool',
                    'passAccurate': 'bool',
                    'passBackZoneInaccurate': 'bool',
                    'passForwardZoneAccurate': 'bool',
                    'passInaccurate': 'bool',
                    'passAccuracy': 'bool',
                    'cornerAwarded': 'bool',
                    'passKey': 'bool',
                    'passChipped': 'bool',
                    'passCrossAccurate': 'bool',
                    'passCrossInaccurate': 'bool',
                    'passLongBallAccurate': 'bool',
                    'passLongBallInaccurate': 'bool',
                    'passThroughBallAccurate': 'bool',
                    'passThroughBallInaccurate': 'bool',
                    'passThroughBallInacurate': 'bool',
                    'passFreekickAccurate': 'bool',
                    'passFreekickInaccurate': 'bool',
                    'penaltyConceded': 'bool',
                    'penaltyMissed': 'bool',
                    'penaltyWon': 'bool',
                    'passRightFoot': 'bool',
                    'passLeftFoot': 'bool',
                    'passHead': 'bool',
                    'sixYardBlock': 'bool',
                    'tackleLastMan': 'bool',
                    'tackleLost': 'bool',
                    'tackleWon': 'bool',
                    'cleanSheetGK': 'bool',
                    'cleanSheetDL': 'bool',
                    'cleanSheetDC': 'bool',
                    'cleanSheetDR': 'bool',
                    'cleanSheetDML': 'bool',
                    'cleanSheetDMC': 'bool',
                    'cleanSheetDMR': 'bool',
                    'cleanSheetML': 'bool',
                    'cleanSheetMC': 'bool',
                    'cleanSheetMR': 'bool',
                    'cleanSheetAML': 'bool',
                    'cleanSheetAMC': 'bool',
                    'cleanSheetAMR': 'bool',
                    'cleanSheetFWL': 'bool',
                    'cleanSheetFW': 'bool',
                    'cleanSheetFWR': 'bool',
                    'cleanSheetSub': 'bool',
                    'goalConcededByTeamGK': 'bool',
                    'goalConcededByTeamDL': 'bool',
                    'goalConcededByTeamDC': 'bool',
                    'goalConcededByTeamDR': 'bool',
                    'goalConcededByTeamDML': 'bool',
                    'goalConcededByTeamDMC': 'bool',
                    'goalConcededByTeamDMR': 'bool',
                    'goalConcededByTeamML': 'bool',
                    'goalConcededByTeamMC': 'bool',
                    'goalConcededByTeamMR': 'bool',
                    'goalConcededByTeamAML': 'bool',
                    'goalConcededByTeamAMC': 'bool',
                    'goalConcededByTeamAMR': 'bool',
                    'goalConcededByTeamFWL': 'bool',
                    'goalConcededByTeamFW': 'bool',
                    'goalConcededByTeamFWR': 'bool',
                    'goalConcededByTeamSub': 'bool',
                    'goalConcededOutsideBoxGoalkeeper': 'bool',
                    'goalScoredByTeamGK': 'bool',
                    'goalScoredByTeamDL': 'bool',
                    'goalScoredByTeamDC': 'bool',
                    'goalScoredByTeamDR': 'bool',
                    'goalScoredByTeamDML': 'bool',
                    'goalScoredByTeamDMC': 'bool',
                    'goalScoredByTeamDMR': 'bool',
                    'goalScoredByTeamML': 'bool',
                    'goalScoredByTeamMC': 'bool',
                    'goalScoredByTeamMR': 'bool',
                    'goalScoredByTeamAML': 'bool',
                    'goalScoredByTeamAMC': 'bool',
                    'goalScoredByTeamAMR': 'bool',
                    'goalScoredByTeamFWL': 'bool',
                    'goalScoredByTeamFW': 'bool',
                    'goalScoredByTeamFWR': 'bool',
                    'goalScoredByTeamSub': 'bool',
                    'aerialSuccess': 'bool',
                    'duelAerialWon': 'bool',
                    'duelAerialLost': 'bool',
                    'offensiveDuel': 'bool',
                    'defensiveDuel': 'bool',
                    'bigChanceMissed': 'bool',
                    'bigChanceScored': 'bool',
                    'bigChanceCreated': 'bool',
                    'overrun': 'bool',
                    'successfulFinalThirdPasses': 'bool',
                    'punches': 'bool',
                    'penaltyShootoutScored': 'bool',
                    'penaltyShootoutMissedOffTarget': 'bool',
                    'penaltyShootoutSaved': 'bool',
                    'penaltyShootoutSavedGK': 'bool',
                    'penaltyShootoutConcededGK': 'bool',
                    'throwIn': 'bool',
                    'subOn': 'bool',
                    'subOff': 'bool',
                    'defensiveThird': 'bool',
                    'midThird': 'bool',
                    'finalThird': 'bool',
                    'pos': 'bool',
                    'isOwnGoal': 'bool',
                    'season': 'str'
                }

                for column, (old_min, old_max, new_min, new_max) in scaling_map.items():
                    df[column] = df[column].apply(lambda val: rescale(val, old_min, old_max, new_min, new_max))

                # float columns to round to 1 decimal (since that is the format in the event file)
                float_columns = ['x','y','endX','endY','goalMouthZ','goalMouthY','blockedX','blockedY']
                for column in float_columns:
                    df[column] = df[column].round(1)

                #float columns to round to 0 decimals (keeping as float to allow for NaN)
                float_columns = ['playerId','relatedEventId','relatedPlayerId']
                for column in float_columns:
                    df[column] = df[column].apply(lambda x: int(x) if pd.notna(x) else x)

                # Filter for events, change conditions depending on the event
                #event_type1 = 'Pass'
                #event_type2 = 'Interception'
                #event_type3 = 'BlockedPass'

                # Filter the dataset for rows where the type is 'Tackle'
                #filtered_df = df[
                #    (df['type'] == event_type1) & 
                #    (~df['qualifiers'].str.contains('Goalkick', na=False)) & 
                #    (df['qualifiers'].str.contains('Cross', na=False)) &
                #    (df['passCorner'] == False) & 
                #    (df['passFreekick'] == False) & 
                #    (df['throwIn'] == False)
                #]
                filtered_df = df[(df['isShot'] == True) & df['situation'] == 'OpenPlay']

                # Select and reorder columns as specified
                result_df = filtered_df[['season', 'matchId', 'id', 'eventId', 'teamId', 'playerId', 'playerName', 'type', 'outcomeType', 'x', 'y']] # add 'endX', 'endY' for passes and carries

                # Sort by 'matchId' first, then by 'playerId'
                result_df = result_df.sort_values(by=['season', 'matchId', 'teamId', 'playerId'], ascending=[True, True, True, True]).reset_index(drop=True)

                # Convert 'playerId' from float to int, handling NaN values by keeping them as NaN
                result_df['playerId'] = result_df['playerId'].apply(lambda x: int(x) if pd.notna(x) else x)

                # Save to the output CSV file
                if not file_exists:
                    # Create the file and save the data
                    result_df.to_csv(output_file, index=False, mode='w')
                    file_exists = True  # Now the file exists
                else:
                    # Append to the existing file
                    result_df.to_csv(output_file, index=False, header=False, mode='a')

                # Release memory for the current DataFrame
                del df, filtered_df, result_df
                print(f"Finished processing file: {file}")

print("All files processed successfully.")