# <center>  REPLAY TO DATA <center>

## Importing packages, setting variables

In [None]:
from carball.json_parser.game import Game
from carball.analysis.analysis_manager import AnalysisManager
import json 
import pandas as pd
import numpy as np
import subprocess

RRROCKET_PATH = '/Users/marcomaluf/Desktop/Unfinished Projects/New RL/rrrocket'
REPLAY_PATH = '/Users/marcomaluf/Desktop/Unfinished Projects/New RL/replays/00a182df-e490-4f98-9143-136bbc443d8b.replay'

command = [
    RRROCKET_PATH,
    "--network-parse",
    REPLAY_PATH
]

## getting json output from rrrocket and turning it into a df with carball

In [None]:
try:
    result = subprocess.run(
        command,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        universal_newlines=True,  # Decode bytes to string
        check=True                # Raise exception for non-zero exit codes
    )
    # Print the standard output
    # print("Standard Output:")
    # print(result.stdout)
    replay_json = json.loads(result.stdout)

except subprocess.CalledProcessError as e:
    # Print the error output
    print(f"Error: {e}")
    print(f"Return Code: {e.returncode}")
    print("Standard Error:")
    print(e.stderr)

game = Game()
game.initialize(loaded_json=replay_json)

analysis_manager = AnalysisManager(game)
analysis_manager.create_analysis(calculate_intensive_events=False)
replay_dataframe = analysis_manager.get_data_frame()

In [5]:
replay_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10300 entries, 1 to 10306
Columns: 112 entries, ('Rythex', 'ping') to ('Ivar111', 'boost_collect')
dtypes: float64(85), int64(1), object(26)
memory usage: 9.2+ MB


---

## Formatting the data for training/evaluation

### Getting key frames from replay .json

In [6]:
highlights = replay_json['properties']['HighLights']
keyframes = [h['frame'] for h in highlights]
keyframes

[726, 1209, 2071, 2393, 3316, 6015, 6266, 7259, 7648, 10231]

In [7]:
goals = replay_json['properties']['Goals']
goalframes = [g['frame'] for g in goals]
goalframes

[1209, 2071, 3316, 6266, 7259, 7648, 10231]

In [8]:
shots = {k: 0 for k in keyframes}
for key in shots.keys():
    for g in goalframes:
        if key == g:
            shots[key] = 1

shots

{726: 0,
 1209: 1,
 2071: 1,
 2393: 0,
 3316: 1,
 6015: 0,
 6266: 1,
 7259: 1,
 7648: 1,
 10231: 1}

In [9]:
# DISTANCE FUNCTION FOR THE `dist_to_ball` COLUMN
def distance(v1, v2):
    d = np.power(v1-v2, 2)
    d = np.sum(d, axis=1)

    dist = np.sqrt(d)
    
    return dist

In [10]:
# HOW TO GET MATCH ID
replay_json['properties']['Id']

'C8109EA44BBF8774B4890D8677CCBDE1'

---

## Data Formatting Roadmap:

DataFrame format:
* $\text{Feature Space} = 112$
* First $10$ columns: ball columns
* Rest of the columns: player columns (each player has $17$ columns)
* For non-3v3 games: rest of the values will be padded with *TBD* ($-\infty$?) (for use with masking layer)

Steps:
* First gather the ball data from the `replay_dataframe`
* Then, from the dictionary of players and teams (`players_teams`), iterate (in order) through players and add their relavent data (both old and new columns) into a temp_df and concatenate it with the ball data
    * If `len(columns)` $< 112$, fill in empty space with the padding value (*TBD*) 
* Then for each keyframe in `keyframes`, take a segment from indeces [frame-64:frame] (frame = keyframe - 1)
    * For each segment, classify it as either 0 (save) or 1 (goal)

In [11]:
SHOT_SEGMENT = 64

players_teams = {p['Name']: p['Team'] for p in replay_json['properties']['PlayerStats']}
players_teams = dict(sorted(players_teams.items(), key=lambda item: item[1]))

# COLUMNS I WANT (from ball and player)
ball_cols = ['pos_x', 'pos_y', 'pos_z', 'vel_x','vel_y', 'vel_z', 
             'ang_vel_x', 'ang_vel_y', 'ang_vel_z', 'hit_team_no']

player_cols = ['pos_x', 'pos_y', 'pos_z', 'vel_x', 'vel_y', 'vel_z', 
               'ang_vel_x','ang_vel_y', 'ang_vel_z', 'throttle', 'steer', 
               'dodge_active', 'double_jump_active', 'jump_active', 'boost_active']

# merging the data together
ball_data = replay_dataframe['ball'][ball_cols]
match_data = replay_dataframe['ball'][ball_cols]
for key, val in players_teams.items():
    temp_df = replay_dataframe[key][player_cols]
    temp_df['team'] = val
    temp_df['dist_to_ball'] = distance(ball_data[['pos_x', 'pos_y', 'pos_z']], 
                                       temp_df[['pos_x', 'pos_y', 'pos_z']])
    match_data = pd.concat([match_data, temp_df], axis=1)
# resetting index
match_data = match_data.reset_index(drop=True)

# logic for if team size < 3
if replay_json['properties']['TeamSize'] < 3:
    amt_to_add = 112 - len(match_data.columns)
    to_add = [f"Column_{i+1}" for i in range(amt_to_add)]
    match_data = pd.concat([match_data, pd.DataFrame(-9999999, index=range(match_data.shape[0]), columns=to_add)], axis=1)

# adding label column
match_data['label'] = np.nan
# adding matchId for organizational purposes, will not be used in model
matchId = replay_json['properties']['Id']
match_data['matchId'] = matchId

# getting the shot segments
for key, val in shots.items():
    match_data.iloc[key, 112] = val

for i, (k, v) in enumerate(shots.items()):
    frame = k-1
    if i == 0:
        formatted_data = match_data.iloc[frame-SHOT_SEGMENT:frame]
        formatted_data.label = v
    else:
        df = match_data.iloc[frame-SHOT_SEGMENT:frame]
        df.label = v
        formatted_data = pd.concat([formatted_data, df], axis=0)
    
formatted_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


Unnamed: 0,pos_x,pos_y,pos_z,vel_x,vel_y,vel_z,ang_vel_x,ang_vel_y,ang_vel_z,hit_team_no,...,Column_27,Column_28,Column_29,Column_30,Column_31,Column_32,Column_33,Column_34,label,matchId
661,-2606.08,4381.32,566.32,8856.5,231.5,-11128.4,2102.9,2552.7,5006.1,1.0,...,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,0,C8109EA44BBF8774B4890D8677CCBDE1
662,-2569.21,4382.27,519.31,8845.0,231.0,-11384.9,2102.9,2552.7,5006.1,1.0,...,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,0,C8109EA44BBF8774B4890D8677CCBDE1
663,-2539.74,4383.03,480.93,8835.8,230.6,-11589.7,2102.9,2552.7,5006.1,1.0,...,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,0,C8109EA44BBF8774B4890D8677CCBDE1
664,-2517.66,4383.60,451.70,8828.9,230.3,-11743.3,2102.9,2552.7,5006.1,1.0,...,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,0,C8109EA44BBF8774B4890D8677CCBDE1
665,-2466.21,4384.93,382.01,8812.8,229.6,-12101.0,2102.9,2552.7,5006.1,1.0,...,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,0,C8109EA44BBF8774B4890D8677CCBDE1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10225,-247.91,5203.03,224.10,,,,,,,1.0,...,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,1,C8109EA44BBF8774B4890D8677CCBDE1
10226,-247.91,5203.03,224.10,,,,,,,1.0,...,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,1,C8109EA44BBF8774B4890D8677CCBDE1
10227,-247.91,5203.03,224.10,,,,,,,1.0,...,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,1,C8109EA44BBF8774B4890D8677CCBDE1
10228,-247.91,5203.03,224.10,,,,,,,1.0,...,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,-9999999,1,C8109EA44BBF8774B4890D8677CCBDE1


In [19]:
DATA_FOLDER_PATH = '/Users/marcomaluf/Desktop/Unfinished Projects/New RL/formatted data'

formatted_data.to_csv(f'{DATA_FOLDER_PATH}/{matchId}.csv')