# Data Collection

#### Here we are extracting the telemetry data. With this, you can efectively recreate a whole match, with details of every small events. The files are too large to save and keep, so I had to be selective with what I wanted to pull from these.


In [1]:
from tqdm import tqdm
import requests
import pandas as pd
import numpy as np
import time
import json
from datetime import datetime
import os
import pickle

Here I read in the dataframe we made in data collection P2. We then iterate through it, pulling the telemetry info.

In [None]:
df_to_extract = pd.read_csv(r'match_df.csv')

In [None]:
for df_index_num, url in enumerate(tqdm(df_to_extract['telemetry_url'])):
    time.sleep(3)
    try:
        r = requests.get(url).json()
        data_v2 = r
        #flattens each dictionary entry into a single string (easier to search through) putting each entry into a list
        test = [', '.join("{!s}={!r}".format(key,val) for (key,val) in data_3.items()) for data_3 in data_v2]
        #iterate through each list, checking if gamestate is in the string

        rows = []
        col_names = []
        with_game_state = []
        flight_path = []
        player_deaths = []
        care_packages = []
        
        #this is making lists of events I'm interested in.
        for i, s in enumerate(test):
            if 'gamestate' in s.lower():
                with_game_state.append(i)
            if 'DummyTransportAircraft_C' in s:
                flight_path.append(i)
            if 'TransportAircraft_Chimera_C' in s:
                flight_path.append(i)
            if "'LogPlayerKillV2'" in s:
                player_deaths.append(i)
            if 'LogCarePackageLand' in s:
                care_packages.append(i)
                
        
        game_state_list = []
        for i in with_game_state:
            game_state_list.append((i,r[i]['common']['isGame']))

        tempdf = pd.DataFrame([[a,b] for a,b in game_state_list],columns=["index_number",'game_state'])
        tempdf.drop_duplicates("game_state",inplace=True)
        game_state_df = tempdf.set_index('game_state')

        game_state_metrics = [r[0]['MatchId']]
        game_state_column_names = ['MatchId']

        game_state_metrics.append(r[flight_path[0]]['character']['location'])
        game_state_metrics.append(r[flight_path[-1]]['character']['location'])

        game_state_column_names.append('first_flight_location')
        game_state_column_names.append('last_flight_location')
        
        try:
            #this is recording the time and location of each care package happening in a match.
            for num, i in enumerate(care_packages):
                game_state_metrics.append(r[i]['_D'])
                game_state_metrics.append(r[i]['itemPackage']['location'])
                game_state_column_names.append(f'care_package_#{num+1}_location')
                game_state_column_names.append(f'care_package_#{num+1}_timestamp')
        except:
            pass
        
        #this is recording details about each game state (or play area)
        for i in game_state_df.index:
            game_state_metrics.append(r[game_state_df.loc[i][0]]['_D'])
            game_state_metrics.append(r[game_state_df.loc[i][0]]['gameState']['numAliveTeams'])
            game_state_metrics.append(r[game_state_df.loc[i][0]]['gameState']['numJoinPlayers'])
            game_state_metrics.append(r[game_state_df.loc[i][0]]['gameState']['numStartPlayers'])
            game_state_metrics.append(r[game_state_df.loc[i][0]]['gameState']['poisonGasWarningPosition'])
            game_state_metrics.append(r[game_state_df.loc[i][0]]['gameState']['poisonGasWarningRadius'])

            #creating column names
            game_state_column_names.append('GS_' + str(i)[0:3] + '_timestamp')
            game_state_column_names.append('GS_' + str(i)[0:3] + '_numAliveTeams')
            game_state_column_names.append('GS_' + str(i)[0:3] + '_numJoinPlayers')
            game_state_column_names.append('GS_' + str(i)[0:3] + '_numStartPlayers')
            game_state_column_names.append('GS_' + str(i)[0:3] + '_poisonGasWarningPosition')
            game_state_column_names.append('GS_' + str(i)[0:3] + '_poisonGasWarningRadius')



        
        cols_with_name = [col for col in df_to_extract.columns if '_name' in col]
        kill_metrics = []
        kill_metric_col_names = []
        

        #this is making a note of where each 'key player' is at the start of the match & each gamestate.   
        for which_player in cols_with_name:
            search_name = df_to_extract[which_player][df_index_num]
            temp_landers = []
            log_list = []
            for i, s in enumerate(test):
                if "character={'name': '" + search_name + "'" in s:     
                    if "LogParachuteLanding" in s:
                        temp_landers.append(i)
                    if "LogPlayerPosition" in s:
                        log_list.append(r[i]['common']['isGame'])
            player_landing_string = str(which_player)[:-4]
            
            try:
                kill_metrics.append(r[temp_landers[0]]['character']['location'])
                kill_metrics.append(r[temp_landers[0]]['distance'])
                kill_metrics.append(r[temp_landers[0]]['_D'])

                kill_metric_col_names.append(player_landing_string + 'landing_location')
                kill_metric_col_names.append(player_landing_string + 'parachute_distance')
                kill_metric_col_names.append(player_landing_string + 'landing_timestamp')

            except:
                pass
            
            log_list = list(dict.fromkeys(log_list))[1:]
            
            GS_position = []
            for GS in log_list:
                temp_location_list = []
                for i, s in enumerate(test):
                    if "character={'name': '" + search_name + "'" in s:
                        if "LogPlayerPosition" in s:
                            if "common={'isGame': " + str(GS) in s:
                                temp_location_list.append(i)
            
                try:
                    GS_position.append(temp_location_list[0])
                except:
                    pass

            for j,i in enumerate(GS_position):
                    kill_metrics.append(r[i]['character']['location'])

                    game_state_name = 'GS_' + str(list(enumerate(log_list))[j][1])[:3]

                    kill_metric_col_names.append(player_landing_string + game_state_name + '_location')
 
       #making a list of each of death in the match, then reversing the list to match ranking positions
        player_deaths = list(dict.fromkeys(player_deaths))
        
        player_deaths.reverse()
                              
        for num, i in enumerate(player_deaths):
            pull_from_kill = [('victim',['accountId','location','ranking']),('finisher',['accountId','location','health','ranking']),
                              ('finishDamageInfo',['damageReason','damageTypeCategory','damageCauserName','distance']),(['isSuicide']),(['common']),(['_D'])]
            

        #when bots/AI would die ingame, it wouldn't give all the info needed above, so this checks to see
        #if that is the case.
            if r[i]['killerDamageInfo']['damageReason'] == 'SimulateAIBeKilled':
                kill_metrics.append('SimulateAIBeKilled')
                kill_metric_col_names.append((f'death#{num}_finishDamageInfo_damageReason_'))
                continue                

            
            for feature in pull_from_kill:
                if len(feature) > 1:
                    for t in feature[1]:
                        if r[i][feature[0]] == None:
                            continue
                        kill_metrics.append(r[i][feature[0]][t])
                        kill_metric_col_names.append(f'death#{num}_{feature[0]}_{t}')
                else:
                    kill_metrics.append(r[i][feature[0]])
                    kill_metric_col_names.append((f'death#{num}_{feature[0]}_'))

        col_names = game_state_column_names + kill_metric_col_names
        rows = game_state_metrics + kill_metrics

        #saving each match into it's own dataframe, to pull later.
        df = pd.DataFrame([rows],columns=col_names)
        save_name = 'telemetry_dataframes3/' + r[0]['MatchId'][-36:]+'.csv'
        df.to_csv(save_name, index = False)

    except:
        continue