In [1]:
import json
import pandas as pd
import cmath
import numpy as np
from instat_to_wyscout_pandas import *

In [2]:
def json_transform(dataframe, path):
    json_data = dataframe.to_json(orient='records')

    # Save the JSON data to a file
    with open(path, 'w') as json_file:
        json_file.write(json_data)
    

In [3]:
def reconstruct_endloc(row):
    return {"x": row['pass.endLocation.x'], "y": row['pass.endLocation.y']}



In [4]:
def reconstruct_recipient(row):
    return {"id": row['pass.recipient.id'], "name": row['pass.recipient.name'], "position": row['pass.recipient.position']}

In [5]:
def reconstruct_pass(row):
    rec = reconstruct_recipient(row)
    return {"accurate": row['pass.accurate'], 'angle': row['pass.angle'], 'height': row['pass.height'], 'length': row['pass.length'], 'recipient': rec}

In [6]:
def reconstruct_poss_team(row):
    return {"id": row['possession.team.id'], "name": row['possession.team.name'], "formation": row['possession.team.formation']}

In [7]:
def reconstruct_poss_endloc(row):
    return {"x": row['possession.endLocation.x'], "y": row['possession.endLocation.y']}

In [8]:
def reconstruct_poss_startloc(row):
    return {"x": row['possession.startLocation.x'], "y": row['possession.startLocation.y']}

In [9]:
def reconstruct_attack(row): 
    if (pd.isna(row['possession.attack.withShot'])):
        return np.nan
    else:
        return {"withShot": row['possession.attack.withShot'], "withShotOnGoal": row['possession.attack.withShotOnGoal'], "withGoal": row['possession.attack.withGoal'], "flank": row['possession.attack.flank'], "withGoal": row['possession.attack.withGoal'], "xg": row['possession.attack.xg']}


In [10]:
reconstruc_list = [
    "type",
    "location",
    "team", 
    "opponentTeam",
    "player",
    "pass",
    "shot",
    "groundDuel",
    "aerialDuel",
    "infraction",
    "carry",
    "possession"
]

In [11]:
def getgoalkeeper(row):
    return {"goalkeeperActionId": row['shot.goalkeeperActionId'], "id": row['shot.goalkeeper.id'], "name": row['shot.goalkeeper.name']}

In [12]:
def getopponent(row):
    return {"id": row['groundDuel.opponent.id'], "name": row['groundDuel.opponent.name'], "position": row['groundDuel.opponent.position']}

In [13]:
def getopponent_aerial(row):
    return {"id": row['aerialDuel.opponent.id'], "name": row['aerialDuel.opponent.name'], "position": row['aerialDuel.opponent.position'], "height": row['aerialDuel.opponent.height']}

In [14]:
def getopponent_infraction(row):
    return {"id": row['infraction.opponent.id'], "name": row['infraction.opponent.name'], "position": row['infraction.opponent.position']}

In [15]:
def reconstruct_carry_loc(row):
    return {"x": row['possession.endLocation.x'], "y": row['possession.endLocation.y']}

In [17]:
def reconstruct_cases(row, keyword): 
    match keyword:
        case "type":
            return {"primary": row['type.primary'], "secondary": row['type.secondary']}
        case "location":
            if (pd.isna(row['location.x'])):
                return np.nan
            return {"x": row['location.x'], "y": row['location.y']}
        case "team": 
            return {"id": row['team.id'], "name": row['team.name'], "formation": row['team.formation']}
        case "opponentTeam":
            return {"id": row['opponentTeam.id'], "name": row['opponentTeam.name'], "formation": row['opponentTeam.formation']}
        case "player": 
            return {"id": row['player.id'], "name": row['player.name'], "position": row['player.position']}
        case "pass":
            if (pd.isna(row['pass.accurate'])):
                return np.nan
            rec = reconstruct_recipient(row)
            endloc =reconstruct_endloc(row)
            return {"accurate": row['pass.accurate'], "angle": row['pass.angle'], "height": row['pass.height'], "length": row['pass.length'], "recipient": rec, "endLocation": endloc}
        case "possession": 
            if (pd.isna(row['possession.startLocation.x'])):
                return np.nan
            startloc = reconstruct_poss_startloc(row)
            endloc = reconstruct_poss_endloc(row)
            team = reconstruct_poss_team(row)
            attack = reconstruct_attack(row)
            return {"id": row['possession.id'], "duration": row['possession.duration'], "types": row['possession.types'], "eventsNumber": row['possession.types'], "eventIndex": row['possession.types'], "startLocation": startloc,"endLocation": endloc, "team": team, "attack": attack}
        case "shot": 
            if (pd.isna(row['shot.bodyPart'])):
                return np.nan
            keeper = getgoalkeeper(row)
            return {"bodyPart": row['shot.bodyPart'], "isGoal": row['shot.isGoal'], "onTarget": row['shot.onTarget'], "goalZone": row['shot.goalZone'], "xg": row['shot.xg'], "postShotXg": row['shot.postShotXg'], "goalkeeper": keeper}
        case "groundDuel": 
            if (pd.isna(row['groundDuel.duelType'])):
                return np.nan
            opponent = getopponent(row)
            return {"opponent": opponent, "duelType": row['groundDuel.duelType'], "keptPossession": row['groundDuel.keptPossession'], "progressedWithBall": row['groundDuel.progressedWithBall'], "stoppedProgress": row['groundDuel.stoppedProgress'], "recoveredPossession": row['groundDuel.recoveredPossession'], "takeOn": row['groundDuel.takeOn'], "side": row['groundDuel.side'], "relatedDuelId": row['groundDuel.relatedDuelId'] }
        case "aerialDuel":
            if (pd.isna(row['aerialDuel.opponent.name'])):
                return np.nan
            opponent = getopponent_aerial(row)
            return {"opponent": opponent, "firstTouch": row['aerialDuel.firstTouch'], "height": row['aerialDuel.height'], "relatedDuelId": row['aerialDuel.relatedDuelId']}
        case "infraction": 
            if (pd.isna(row['infraction.yellowCard'])):
                return np.nan
            opponent = getopponent_infraction(row)
            return {"yellowCard": row['infraction.yellowCard'], "redCard": row['infraction.redCard'], "type": row['infraction.type'], "opponent": opponent}
        case "carry":
            if (pd.isna(row['carry.progression'])):
                return np.nan
            return {"progression": row['carry.progression'], "endLocation": {"x": row['carry.endLocation.x'], "y": row['carry.endLocation.y']}}



In [13]:
mypath = "data/National_Men_2022-09-24_Europe. UEFA U19 Championship Qualification_Czechia U19_vs_Switzerland U19_2385376_2022-09-24_Czechia U19_vs_Switzerland U19_2385376_rawdata"

In [3]:
mypath = "data/National_Men_2022-09-24_Europe. UEFA U19 Championship Qualification_Czechia U19_vs_Switzerland U19_2385376_2022-09-24_Czechia U19_vs_Switzerland U19_2385376_rawdata"
dataframe = pandas_transform("data/National_Men_2022-09-24_Europe. UEFA U19 Championship Qualification_Czechia U19_vs_Switzerland U19_2385376_2022-09-24_Czechia U19_vs_Switzerland U19_2385376_rawdata")
dataframe

  instatdf=pd.read_xml(instat_path)


XMLSyntaxError: Start tag expected, '<' not found, line 1, column 1 (<string>, line 1)

In [3]:
path = 'data/Wyscout_Servette_FCZ_2023-06-02.json'

In [4]:
with open(path, 'r') as f:
    rd = f.read()
obj = json.loads(rd)
dfw = pd.json_normalize(obj)
dfw

Unnamed: 0,meta,events
0,[],"[{'id': 1669813357, 'matchId': 5461231, 'match..."


In [5]:
single_entry = dfw['events'].iloc[0]

expanded_table = pd.json_normalize(single_entry)
expanded_table

Unnamed: 0,id,matchId,matchPeriod,minute,second,matchTimestamp,videoTimestamp,relatedEventId,shot,groundDuel,...,aerialDuel.opponent.id,aerialDuel.opponent.name,aerialDuel.opponent.position,aerialDuel.opponent.height,aerialDuel.firstTouch,aerialDuel.height,aerialDuel.relatedDuelId,shot.goalkeeper,infraction.opponent,location
0,1669813357,5461231,1H,0,3,00:00:03.210,4.210517,1.669813e+09,,,...,,,,,,,,,,
1,1669813358,5461231,1H,0,5,00:00:05.478,6.478266,1.669813e+09,,,...,,,,,,,,,,
2,1669813359,5461231,1H,0,12,00:00:12.267,13.267664,,,,...,,,,,,,,,,
3,1669813427,5461231,1H,0,19,00:00:19.628,20.628473,1.669813e+09,,,...,,,,,,,,,,
4,1669813430,5461231,1H,0,22,00:00:22.068,23.0683,1.669813e+09,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1565,1669815080,5461231,2H,93,49,01:33:49.768,5652.768946,1.669815e+09,,,...,,,,,,,,,,
1566,1669814887,5461231,2H,93,52,01:33:52.137,5655.13785,1.669815e+09,,,...,,,,,,,,,,
1567,1669814888,5461231,2H,93,53,01:33:53.359,5656.359792,1.669815e+09,,,...,,,,,,,,,,
1568,1669815081,5461231,2H,93,58,01:33:58.521,5661.521425,,,,...,,,,,,,,,,


In [7]:
expanded_table['type'] = expanded_table.apply(lambda row: reconstruct_cases(row, "possession"), axis=1)
dl=expanded_table.replace(pd.NA, None)
dl=expanded_table.replace(np.nan, "heelo")
dl

NameError: name 'reconstruct_cases' is not defined

In [8]:
expanded_dict = expanded_table.to_dict(orient='records')
nested_structure = {
    'events': expanded_dict  # Assuming 'table' is the column where the nested structure resides
}

concatenated_result = '['
x=pd.DataFrame(nested_structure)
for index, row in x.iterrows():
   concatenated_result += str(row['events']) + ', '

concatenated_result = concatenated_result[:-2]
concatenated_result += ']'

newdata= {
    'meta': [[]],
    'events': concatenated_result
}
print(type(concatenated_result))
y=pd.DataFrame(newdata)
y



<class 'str'>


Unnamed: 0,meta,events
0,[],"[{'id': 1669813357, 'matchId': 5461231, 'match..."


In [9]:
json_data = y.to_json(orient='records')
file_path = 'output2.json'

# Save the JSON data to a file
with open(file_path, 'w') as json_file:
    json_file.write(json_data)