In [1]:
from IPython.core.debugger import Tracer, Pdb
import pdb
import json
import numpy
import csv
import logging
import os, sys, re, copy, math, time, enum, ctypes
import multiprocessing
from multiprocessing import Pool
### magic numbers/values ###
# the number at which a hero is considered heavily damaged
HEAVY_DAMAGE_VAL = 0.1
# the upper limit for damage counters (and others)
LAST_DAMAGE_MAX_TIME = 200
# the distance at which a hero is considered for the nearby allies/enemies list
NEARBY_DISTANCE = 20
# ticks to look into the future to evaluate critical situations
CRITICAL_FUTURE_TIME = 50
# if a critical hero will not have been damaged for this amount of ticks, they are considered saved
LAST_DAMAGE_THRESHOLD = 40
# test (untested)
TEST = False
# reset TEST in case this is not the main function to prevent massive spam; if you really want this, uncomment the line below
TEST = __name__ == '__main__' and TEST or False
# hard-coded output version. This version will be used by this part of the code while saving and by the ML part while reading the data files; Version 8e was used for the thesis
OUTPUT_VERSION = "8e"
OUTPUT_VERSION_DICT={"8":[90,120,150,180,210,240], "8b":[90,120,150],"8c":[120,150,180],"8d":[120,150,180,210,240], "8e":[150,180,210,240]}
### logging ###
logger = logging.getLogger(__name__)
logger.handlers.clear()
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.WARNING)

In [2]:
all_hero_names=['_', 'antimage', 'axe', 'bane', 'bloodseeker', 'crystalmaiden', 'drowranger',
			'earthshaker', 'juggernaut', 'mirana', 'morphling', 'nevermore', 'phantomlancer',
			'puck', 'pudge', 'razor', 'sandking', 'stormspirit', 'sven', 'tiny', 'vengefulspirit',
			'windrunner', 'zuus', '_', 'kunkka', 'lina', 'lion', 'shadowshaman', 'slardar',
			'tidehunter', 'witchdoctor', 'lich', 'riki', 'enigma', 'tinker', 'sniper', 'necrolyte',
			'warlock', 'beastmaster', 'queenofpain', 'venomancer', 'facelessvoid', 'skeletonking',
			'deathprophet', 'phantomassassin', 'pugna', 'templarassassin', 'viper', 'luna',
			'dragonknight', 'dazzle', 'rattletrap', 'leshrac', 'furion', 'lifestealer',
			'darkseer', 'clinkz', 'omniknight', 'enchantress', 'huskar', 'nightstalker',
			'broodmother', 'bountyhunter', 'weaver', 'jakiro', 'batrider', 'chen', 'spectre',
			'ancientapparition', 'doombringer', 'ursa', 'spiritbreaker', 'gyrocopter', 'alchemist',
			'invoker', 'silencer', 'obsidiandestroyer', 'lycan', 'brewmaster', 'shadowdemon',
			'lonedruid', 'chaosknight', 'meepo', 'treant', 'ogremagi', 'undying', 'rubick',
			'disruptor', 'nyxassassin', 'nagasiren', 'keeperofthelight', 'wisp', 'visage',
			'slark', 'medusa', 'trollwarlord', 'centaur', 'magnataur', 'shredder', 'bristleback',
			'tusk', 'skywrathmage', 'abaddon', 'eldertitan', 'legioncommander', 'techies',
			'emberspirit', 'earthspirit', '_', 'terrorblade', 'phoenix', 'oracle',
			'winterwyvern', 'arcwarden']
		# not yet included: 'abyssalunderlord'(108), 'monkey_king'(114), 'dark_willow'(119), 'pangolier'(120)
def hash_hero_name(name):
	if name not in all_hero_names:
		logger.error("hero name {} is not in the hero list".format(name))
		raise ValueError("error: hero {} is not in the list".format(name))
	return all_hero_names.index(name)

def unhash_hero_name(name_index):
	if name_index not in range(0, len(all_hero_names)) or all_hero_names[name_index] == '_':
		logger.error("hero hash ID {} is not in the hero list".format(name_index))
		raise ValueError("error: hero ID {} is not in the list".format(name_index))
	return all_hero_names[name_index]

if not "herostatdict" in globals():
	herostatdict = {}

herostatfilepath = "./dota_hero_stats.csv"
def get_initial_hero_stats(name):
	if len(herostatdict.keys())==0:
		# import data first
		with open(herostatfilepath) as herocsv:
			contents = csv.DictReader(herocsv)
			for line in contents:
				hdict = {"attribute":line['A'], "strength":round(float(line['STR'])), "strength_inc":round(float(line['STR+'])),
					 "strength_max":round(float(line['STR25'])), "agility":round(float(line['AGI'])),
					 "agility_inc":round(float(line['AGI+'])), "agility_max":round(float(line['AGI25'])),
					 "intellect":round(float(line['INT'])), "intellect_inc":round(float(line['INT+'])),
					 "intellect_max":round(float(line['INT25'])), "total_attrs":round(float(line['T'])),
					 "total_attributes_inc":round(float(line['T+'])), "total_attributes_max":round(float(line['T25'])),
					 "base_speed":round(float(line['MS'])), "base_armor":round(float(line['AR'])),
					 "starting_atk_min":round(float(line['DMG(MIN)'])), "starting_atk_max":round(float(line['DMG(MAX)'])),
					 "range":round(float(line['RG'])), "base_attack_time":round(float(line['BAT'])),
					 "attack_points":round(float(line['ATKPT'])), "attack_backswing":round(float(line['ATKBS'])),
					 "vision_range_day":round(float(line['VS-D'])), "vision_range_night":round(float(line['VS-N'])),
					 "turn_rate":round(float(line['TR'])), "collision_size":round(float(line['COL'])),
					 "base_regen":round(float(line['HP/S'])), "name":line['HERO']}
				# replace hero name
				hname = re.sub('[ \'-]', '', str.lower(line['HERO']))
				if hname == 'centaurwarrunner':
					hname = 'centaur'
				elif hname == 'clockwerk':
					hname = 'rattletrap'
				elif hname == 'doom':
					hname = 'doombringer'
				elif hname == 'io':
					hname = 'wisp'
				elif hname == 'magnus':
					hname = 'magnataur'
				elif hname == 'naturesprophet':
					hname = 'furion'
				elif hname == 'necrophos':
					hname = 'necrolyte'
				elif hname == 'shadowfiend':
					hname = 'nevermore'
				elif hname == 'outworlddevourer':
					hname = 'obsidiandestroyer'
				elif hname == 'timbersaw':
					hname = 'shredder'
				elif hname == 'wraithking':
					hname = 'skeletonking'
				elif hname == 'treantprotector':
					hname = 'treant'
				elif hname == 'windranger':
					hname = 'windrunner'
				elif hname == 'zeus':
					hname = 'zuus'
				herostatdict.update({hname:hdict})
	# will throw error when no hero could be found
	return herostatdict[name]

if TEST:
	print(hash_hero_name("zuus"))
	print(unhash_hero_name(42))
	print(get_initial_hero_stats(unhash_hero_name(42)))

In [3]:
def reader(path):
	content = []
	with open(path, "rb") as readfile:
		content = readfile.read()
	return content
if TEST:
	# load example files for tests, e.g.
	text1  = reader("../dota/123456789.json")
	jsonRoot1 = json.loads(text1.decode())
	pass

In [4]:
class hero:
	target_name = "lifeState" # ML target, will be written to CSV later
	def __init__(self, name_or_dict, team=0, curHealth=0, maxHealth=0, healthRegen=0, posX=0, posY=0,
				 gold=0, xp=0, currentLevel=0, lastDamageTime=30, mana=0, maxMana=0, manaRegen=0,
				 lifeState=0, totalDamageTaken=0, recentDamage=0, agility=0, agilityTotal=0, intellect=0,
				 intellectTotal=0, strength=0, strengthTotal=0, deaths=0, buffdebuff=0, kills=0,
				 assists=0, lastAbilityTime=0, lastTowerAttack=0, lastDebuffTime=0, initialize=False):
		self._hero_properties = {}
		if isinstance(name_or_dict, str):
			if initialize:
				# fetch hero stats
				hdict = get_initial_hero_stats(name_or_dict)
				strength = hdict['strength']
				strengthTotal = strength
				agility = hdict['agility']
				agilityTotal = agility
				intellect = hdict['intellect']
				intellectTotal = intellect
				healthRegen = hdict['base_regen']
				# not used: basespeed = hdict['base_speed'], armor = hdict['base_armor'], range = hdict['range']
				# these variables are not updated because the game client is expected to calculate/update their values on its own
			self._hero_properties.update({"name":hash_hero_name(name_or_dict), "hname": name_or_dict,
				 "maxHealth":maxHealth, "health":curHealth, self.target_name: curHealth > 0,
				 "posX":posX, "posY":posY, "gold_total":gold, "xp_total":xp, "current_level":currentLevel,
				 "team":team, "lastDamageTime":lastDamageTime, "mana":mana, "maxMana":maxMana,
				 "deaths": deaths, "buffdebuff": buffdebuff, "manaRegen":manaRegen,
				 "healthRegen":healthRegen, "totalDamageTaken": totalDamageTaken, "recentDamage": recentDamage,
				 "agility":agility, "agilityTotal":agilityTotal, "intellect":intellect,
				 "intellectTotal":intellectTotal, "strength":strength, "strengthTotal":strengthTotal,
				 "lifeState":lifeState, "kills":kills, "assists":assists, "lastAbilityTime":lastAbilityTime,
				 "lastTowerAttack":lastTowerAttack, "lastDebuffTime":lastDebuffTime})
		elif isinstance(name_or_dict, dict):# to make cloning easier
			self._hero_properties = name_or_dict

	# GETTER FUNCTIONS ============================
	def getHeroStatsAnnotation(self):
		hero_property_datatypes = [re.split('\'',str(type(self._hero_properties[elem])))[1] for elem in self._hero_properties.keys()]
		return [self._hero_properties.keys(), hero_property_datatypes]

	def getHeroStats(self):
		return self._hero_properties

	def getHeroStatsArr(self):
		self.resultArray = []
		self.resultArray = map(self.resultArray.append,self._hero_properties.values())
		return self.resultArray

	def getHeroName(self):
		return unhash_hero_name(self._hero_properties["name"])

	def getHeroProperty(self, property_name):
		return self._hero_properties[property_name]

	def getPartialDamaged(self, healthPct):
		damaged = (self._hero_properties['health']/
				   self._hero_properties['maxHealth'])<=healthPct
		return damaged

	def getHeroPosition(self):
		return (self._hero_properties['posX'],self._hero_properties['posY'])

	# UPDATE FUNCTIONS ============================
	def updateHeroStatsFromMessage(self, msg, msgtype="entity"):
		if msgtype == "entity":
			for elem, val in msg.items():
				if elem == 'posX':
					self._hero_properties['posX']=val
				elif elem == 'posY':
					self._hero_properties['posY']=val
				elif elem == 'health':
					if msg["health"] < self._hero_properties["health"]:
						self._hero_properties["lastDamageTime"]=0
					self._hero_properties['health']=val
				elif elem == 'maxHealth':
					self._hero_properties['maxHealth']=val
				elif elem == 'healthRegen':
					self._hero_properties['healthRegen']=val
				elif elem == 'gold_total':
					self._hero_properties['gold_total']=val
				elif elem == 'xp_total':
					self._hero_properties['xp_total']=val
				elif elem == 'current_level':
					self._hero_properties['current_level']=val
				elif elem == 'mana':
					self._hero_properties['mana']=val
				elif elem == 'maxMana':
					self._hero_properties['maxMana']=val
				elif elem == 'manaRegen':
					self._hero_properties['manaRegen']=val
				elif elem == 'lifeState':
					self._hero_properties['lifeState']=val
				elif elem == 'agility':
					self._hero_properties['agility']=val
				elif elem == 'agilityTotal':
					self._hero_properties['agilityTotal']=val
				elif elem == 'intellect':
					self._hero_properties['intellect']=val
				elif elem == 'intellectTotal':
					self._hero_properties['intellectTotal']=val
				elif elem == 'strength':
					self._hero_properties['strength']=val
				elif elem == 'strengthTotal':
					self._hero_properties['strengthTotal']=val
				elif elem == 'totalDamageTaken':
					self._hero_properties['totalDamageTaken']=val
				elif elem == 'recentDamage':
					self._hero_properties['recentDamage']=val
			try:
				if (msg["health"] == 0 and not self._hero_properties['lifeState']==1 or
						msg["health"] > 0 and not self._hero_properties['lifeState']==0):
					logger.error("wrong lifeState for hero {}, resetting manually".format(self._hero_properties["hname"]))
					self._hero_properties.update({'lifeState':self._hero_properties['health']==0 and 1 or 0})
			except KeyError:
				pass
		elif msgtype == "damage":
			for elem, val in msg.items():
				# check if under attack by tower
				if elem == 'attacker' and "tower" in val:
					self._hero_properties["lastTowerAttack"] = -1
				# otherwise ignore, last damage is already updated in entity message
		elif msgtype == "death":
			if msg["killed"] == self._hero_properties["hname"]:
				# hero just died, increase death count
				self._hero_properties['deaths']+=1
			elif msg["attacker"] == self._hero_properties["hname"]:
				# increment kills
				self._hero_properties["kills"]+=1
			elif self._hero_properties["hname"] in msg["assistants"]:
				# increment assists
				self._hero_properties["assists"]+=1
		elif msgtype == "ability":
			if msg["target"] == self._hero_properties["hname"] and not msg["target"] == msg["attacker"]:
				self._hero_properties["lastAbilityTime"] = -1
			# ignore otherwise
		elif msgtype == "debuff":
			if msg["target"] == self._hero_properties["hname"]:
				self._hero_properties["lastDebuffTime"] = -1
			# don't note otherwise
		return self
	
	def updateCounters(self):
		if self._hero_properties["lastDamageTime"] < LAST_DAMAGE_MAX_TIME:
			self._hero_properties["lastDamageTime"] += 1
		if self._hero_properties["lastTowerAttack"] < LAST_DAMAGE_MAX_TIME:
			self._hero_properties["lastTowerAttack"] += 1
		if self._hero_properties["lastAbilityTime"] < LAST_DAMAGE_MAX_TIME:
			self._hero_properties["lastAbilityTime"] += 1
		if self._hero_properties["lastDebuffTime"] < LAST_DAMAGE_MAX_TIME:
			self._hero_properties["lastDebuffTime"] += 1

	def __deepcopy__(self, memo):
		# deepcopy complex subelements here
		heroPropertiesCopy = copy.deepcopy(self._hero_properties)
		return hero(heroPropertiesCopy)

if TEST:
	hero_lina = getState(jsonRoot1, 12406, "lina", 1)
	print(hero_lina.getHeroStats())
	print(hero_lina.getHeroName())
	print(hero_lina.getHeroStatsAnnotation())

In [5]:
def getStartingHealth(jsonContent):
	startingHealths = {}
	remainingHeroes = []
	replayList = jsonContent["replay"]
	for i in range(0, len(jsonContent["meta_info"]["player_info"])):
			hero_name = jsonContent["meta_info"]["player_info"][i-1]["hero_name"]
			remainingHeroes.append(hero_name)
	for i in range(0, len(replayList)):
		if len(remainingHeroes) == 0:
			break;
		try:
			if (replayList[i]['data']['hero_name'] in remainingHeroes):
				startingHealths.update({replayList[i]['data']['hero_name']: replayList[i]["data"]["maxHealth"]})
				remainingHeroes.remove(replayList[i]['data']['hero_name'])
		except KeyError:
			continue
	if not len(startingHealths.keys()) == 10:
		raise RuntimeError("starting health not found for exactly 10 heroes! (most likely someone is missing)")
	if len(remainingHeroes) > 0:
		for h in remainingHeroes:
			raise RuntimeError("health of a hero could not be found")
	return startingHealths

def getStartingPosition(jsonContent):
	startingPositions = {}
	remainingHeroes = []
	replayList = jsonContent["replay"]
	for i in range(0, len(jsonContent["meta_info"]["player_info"])):
			hero_name = jsonContent["meta_info"]["player_info"][i-1]["hero_name"]
			remainingHeroes.append(hero_name)
	for i in range(0, len(replayList)):
		if len(remainingHeroes) == 0:
			break;
		try:
			if (replayList[i]['data']['hero_name'] in remainingHeroes):
				startingPositions.update({replayList[i]['data']['hero_name']:
						(replayList[i]["data"]["posX"], replayList[i]["data"]["posY"])})
				remainingHeroes.remove(replayList[i]['data']['hero_name'])
		except KeyError:
			continue
	if not len(startingPositions.keys()) == 10:
		raise RuntimeError("starting positions not found for exactly 10 heroes! (most likely someone is missing)")
	return startingPositions

if TEST:
	getStartingHealth(jsonRoot1)
	getStartingPosition(jsonRoot1)

In [6]:
class ReplayStateList: # will always return tuple (tickNumber, StateArray)
	def __init__(self, jsonContent):
		self.replayList = jsonContent["replay"]
		self.replayListCurrent = 0
		self.replayListMax = len(self.replayList)-1# theoretical maximum if every message were in a new tick
		self.maxTicks = self.replayList[-1]['tick']
		# the replayStateList should contain one tuple (tickNum, {state array}) for each tick
		self.replayStateList = [None for _ in range(self.replayListMax)]
		self.replayStateListCurrent = 0
		# the fullStateArray should contain all players with their respective stats in the form {{"heroName":{herostats...}},{"hero2Name":{hero2stats...}}}
		self.fullStateArray = {}
		# the following line will return an exception if a player is missing, e.g. when one player has never joined the match
		_startingHealth = getStartingHealth(jsonContent)
		_startingPosition = getStartingPosition(jsonContent)
		for i in range(0, len(jsonContent["meta_info"]["player_info"])):
			hero_name = jsonContent["meta_info"]["player_info"][i-1]["hero_name"]
			hero_team = jsonContent["meta_info"]["player_info"][i-1]["game_team"]
			hero_health = _startingHealth[hero_name]
			(hero_x, hero_y) = _startingPosition[hero_name]
			heroObj = hero(hero_name, team=hero_team, maxHealth=hero_health, curHealth=hero_health, posX=hero_x, posY=hero_y, initialize=True)
			self.fullStateArray.update({hero_name:heroObj})
		self.replayStateList[0] = (0, self.fullStateArray)
		# prepare full state array
		end_reached = False
		while not end_reached:
			try:
				self.calcNextElem()
			except(StopIteration):
				end_reached = True
				self.replayStateList = [elem for elem in self.replayStateList if elem is not None]
			except(AssertionError):
				raise AssertionError
		self.replayStateListCurrent = 0
		# remove replayList since it is no longer relevant
		self.replayList = None
		self.replayListCurrent = None

	def calcNextElem(self):
		### update state based on the next frame
		if self.replayListCurrent > self.replayListMax:
			raise StopIteration
		# create new full state array
		newFullStateArray = copy.deepcopy(self.fullStateArray)
		# collect first message for new tick
		newmessage = self.replayList[self.replayListCurrent]["data"]
		newmessageTick = self.replayList[self.replayListCurrent]["tick"]
		newmessageType = self.replayList[self.replayListCurrent]["type"]
		newmessageArr = [(newmessageType, newmessage)]
		if not len(newFullStateArray)==10:
			raise RuntimeError("new full state array has {} elements instead of 10!".format(len(newFullStateArray)))
		# check if tick of next message is the same
		while self.replayListCurrent < self.replayListMax and self.replayStateListCurrent < self.maxTicks and self.replayList[self.replayListCurrent+1]["tick"] == newmessageTick:
			# update data with both messages and increment total counter
			newmessageArr.append((self.replayList[self.replayListCurrent+1]["type"],self.replayList[self.replayListCurrent+1]["data"]))
			self.replayListCurrent += 1
		for msgtype, msg in newmessageArr:
			if msgtype == "entity":
				heroname = msg["hero_name"]
				assert heroname in newFullStateArray.keys(), "hero {} not found in full state array!"
				newstate = newFullStateArray[heroname].updateHeroStatsFromMessage(msg, msgtype=msgtype)
				newFullStateArray[heroname] = newstate
			elif msgtype == "damage" or msgtype == "ability" or msgtype == "debuff":
				heroname = msg["target"]
				newstate = newFullStateArray[heroname].updateHeroStatsFromMessage(msg, msgtype=msgtype)
				newFullStateArray[heroname] = newstate
			elif msgtype == "death":
				involved_heroes = msg["assistants"]+[msg["killed"]]# attacker is in assistants list
				for hero in involved_heroes:
					newstate = newFullStateArray[hero].updateHeroStatsFromMessage(msg, msgtype=msgtype)
					newFullStateArray[hero] = newstate
		# update all the number counters
		for hero_name in newFullStateArray.keys():
			newstate = newFullStateArray[hero_name].updateCounters()
		self.replayListCurrent +=1
		self.fullStateArray = newFullStateArray
		self.replayStateListCurrent += 1
		self.replayStateList[self.replayStateListCurrent] = (newmessageTick, newFullStateArray)
	def getOutputVersion(self):
		return self._version
	def __iter__(self):
		return self
	def hasNext(self):
		return self.replayStateListCurrent < len(self.replayStateList)-1
	def __next__(self): # get state of next timestamp, if available
		if self.replayStateListCurrent >= len(self.replayStateList)-1:
			logger.debug("reached replay list end at tick {}".format(str(self.replayStateListCurrent)))
			raise StopIteration
		else:
			self.replayStateListCurrent += 1
			return self.replayStateList[self.replayStateListCurrent]
	def __prev__(self):
		if self.replayStateListCurrent < 1:
			logger.debug("reached replay list beginning")
			raise StopIteration
		else:
			self.replayStateListCurrent -= 1
			return self.replayStateList[self.replayStateListCurrent]
	def getCurrent(self): # = highest available
		return self.replayStateList[self.replayStateListCurrent]
	# get state from replaylist at current+futureNum
	def getFutureState(self, futureNum):
		return self.replayStateList[min(self.replayStateListCurrent+futureNum,len(self.replayStateList)-1)]
	# find and return state at given tick, starting from the current tick. Computationally expensive
	def getStateAtTick(self, tickNum):
		if tickNum <= self.replayStateList[0][0]:
			return self.replayStateList[0]
		if tickNum >= self.replayStateList[-1][0]:
			return self.replayStateList[-1]
		# find valid entry for given tick (last element before the requested tick)
		if tickNum == self.replayStateList[self.replayStateListCurrent][0]:
			return self.replayStateList[self.replayStateListCurrent]
		jumpvar = abs(tickNum - self.replayStateList[self.replayStateListCurrent][0])
		if tickNum < self.replayStateList[self.replayStateListCurrent][0]:
			# requested tick is in the past
			reqtick = self.replayStateListCurrent
			while tickNum <= self.replayStateList[reqtick][0]:
				reqtick -= jumpvar
				if jumpvar > 1:
					jumpvar = max(math.floor(jumpvar/2),1)
		elif tickNum > self.replayStateList[self.replayStateListCurrent][0]:
			# requested tick is in the future
			reqtick = self.replayStateListCurrent
			while tickNum > self.replayStateList[reqtick][0]:
				reqtick += 1
				if jumpvar > 1:
					jumpvar = max(math.floor(jumpvar/2),1)
			if len(self.replayStateList)<reqtick-1:
				reqtick += 1
		return self.replayStateList[reqtick]

In [7]:
def pointDistance(p1, p2):
	return math.sqrt((p1[0]-p2[0])**2+(p1[1]-p2[1])**2)
if TEST:
	print(pointDistance((1,1),(2,2)))
# dictState should look like the output of startingPosition, with the hero name optionally being replaced by the hash value, e.g. {'bane': (182, 174),...} or {6: (182, 174),...}
def calcHeroDistances(dictState):
	distances = {}
	hkeys = [h for h in dictState.keys()]
	hkeys2 = [h for h in dictState.keys()]
	for hero1 in hkeys:
		hero1pos = dictState[hero1]
		hero1arr = {}
		hkeys2.remove(hero1)# remove current hero, since the distance to self is irrelevant
		for hero2 in hkeys2:
			hero2pos = dictState[hero2]
			dist = pointDistance(hero1pos, hero2pos)
			hero1arr[hero2]=dist
		if len(hero1arr)>0:
			distances[hero1]=hero1arr
	return distances

def getHeroDistance(h1, h2, distances):
	if type(h1) == str or type(h2) == str:
		raise ValueError
	if h1 == h2:
		return 0
	try:
		return distances[h1][h2]
	except KeyError:
		return distances[h2][h1]
	h1_name = unhash_hero_name(h1)
	h2_name = unhash_hero_name(h2)
	logger.error("distance between {} and {} not found in {}".format(h1_name, h2_name, distances))
	return 0

if TEST:
	testHeroPos = {3: (182, 174), 62: (180, 176), 99: (182, 176), 87: (72, 76), 69: (70, 76), 74: (182, 176), 25: (72, 74), 27: (74, 76), 93: (180, 176), 18: (74, 74)}
	testHeroDists = calcHeroDistances(testHeroPos)
	print("all distances:",testHeroDists)
	print("distances between bane and slark",getHeroDistance(87, 93, testHeroDists))

In [8]:
# calculate whether any hero is noteworthy/relevant in the given state with respect to the near past and future
def checkStateNoteworthiness(currentState, globalState, currTick, replayStateList, lastDamageThreshold=LAST_DAMAGE_THRESHOLD, heavyDamageVal=HEAVY_DAMAGE_VAL, criticalFutureTime=CRITICAL_FUTURE_TIME):
	# globalState can be used to retain information about long-term events, e.g. currently critical or dead heroes
	try:
		critical_heroes = globalState["critical_heroes"]
		dead_heroes = globalState["dead_heroes"]
	except KeyError:
		critical_heroes = set()
		dead_heroes = set()
	# check if any of the heroes is newly dead
	newdead = {hero.getHeroProperty("name") for hero in currentState.values() if hero.getHeroProperty("health") == 0 and hero.getHeroProperty("name") not in dead_heroes}
	# add newly dead heroes to list and to candidates and (if applicable) remove them from critical heroes
	candidates = newdead
	# remove heroes from dead_heroes that are alive again
	for hero in tuple(dead_heroes):
		if not currentState.get(unhash_hero_name(hero)).getHeroProperty("health")==0:
			dead_heroes.discard(hero)
	# now add newly dead heroes
	dead_heroes.update(newdead)
	for h in newdead:
		critical_heroes.discard(h)
	# check if any of the heroes is heavily damaged right now
	newcritical = {hero.getHeroProperty("name") for hero in currentState.values() if hero.getPartialDamaged(heavyDamageVal) and
				 hero.getHeroProperty("name") not in critical_heroes and hero.getHeroProperty("name") not in dead_heroes}
	# remove heroes from critical list if they are no longer in a critical condition
	for hero in tuple(critical_heroes):
		if not currentState.get(unhash_hero_name(hero)).getPartialDamaged(heavyDamageVal+0.05):
			# hero is now definitely saved, remove them from the critical heroes list
			critical_heroes.discard(hero)
	# check if any of the new critical candidates will save themselves or die in the near future
	(actualtick,nearfuture) = replayStateList.getFutureState(criticalFutureTime)
	for hname in newcritical:
		future_hero_object = nearfuture.get(unhash_hero_name(hname))
		if not future_hero_object.getPartialDamaged(heavyDamageVal+0.05) and future_hero_object.getHeroProperty("lastDamageTime")<=lastDamageThreshold:
			# hero has apparently been saved, add them to interesting candidates
			candidates.add(hname)
			critical_heroes.add(hname)
		elif future_hero_object.getPartialDamaged(0):
			# hero will die soon, add them to critical heroes (they will be added to the candidates list later by the dead_heroes part)
			critical_heroes.add(hname)
		#otherwise: check hero again in the next tick
	globalState["dead_heroes"]=dead_heroes
	globalState["critical_heroes"]=critical_heroes
	return candidates, globalState

def getDataState(currentState, replayList, currTick, framesP, noteworthyHeroes = {}, appendResultDataTypes = False):
	# array for one or multiple data sets that are deduced from the current state
	resultArray = []
	frames1p=framesP[0]
	if appendResultDataTypes:
		features = [("name",'int'), ("maxHealth-"+str(frames1p),'int'), ("health-"+str(frames1p),'int')] + [("health-"+str(elem),'int') for elem in framesP[1:]] + [("healthRegen-"+str(frames1p),'int'), ("lifeState",'boolean'),
						("mana-"+str(frames1p),'int'), ("maxMana-"+str(frames1p),'int'), ("manaRegen-"+str(frames1p),'int'),
						("agility-"+str(frames1p),'int'), ("agilityTotal-"+str(frames1p),'int'), ("intellect-"+str(frames1p),'int'),
						("intellectTotal-"+str(frames1p),'int'), ("strength-"+str(frames1p),'int'), ("strengthTotal-"+str(frames1p),'int'),
						("recentDamage-"+str(frames1p),'int'), ("currentLevel-"+str(frames1p),'int'), ("posX-"+str(frames1p),'int'),
						("posY-"+str(frames1p),'int'), ("gold_total-"+str(frames1p),'int'), ("xp_total-"+str(frames1p),'int'),
						("lastDamageTime-"+str(frames1p),'int'), ("deaths-"+str(frames1p),'int'), ("nearbyFriends-"+str(frames1p),'int'),
						("nearestFriendDistance-"+str(frames1p),'float'), ("nearbyEnemies-"+str(frames1p),'int'),
						("nearestEnemyDistance-"+str(frames1p),'float'), ("nearestEnemyName-"+str(frames1p),'int'), ("game_deca-minutes-"+str(frames1p),'int'),
						("kills-"+str(frames1p),'int'), ("assists-"+str(frames1p),'int'),("lastAbilityTime-"+str(frames1p),'int'),
						("lastTowerAttack-"+str(frames1p),'int'), ("lastDebuffTime-"+str(frames1p),'int')]
		#removed: ("team",'int'), ("totalDamageTaken-"+str(frames1p),'int')
		return features
	tickstates = [replayList.getStateAtTick(currTick-elem) for elem in framesP]
	hPositions = {}
	for hv in tickstates[0][1].values():
		hPositions[hv.getHeroProperty("name")]=(hv.getHeroProperty("posX"), hv.getHeroProperty("posY"))
	hDists = calcHeroDistances(hPositions)
	# tick p1 for all heroes
	allhStats = {hash_hero_name(key):elem.getHeroStats() for key,elem in tickstates[0][1].items()}
	for heroID in noteworthyHeroes:
		heroName = unhash_hero_name(heroID)
		hero = currentState.get(heroName)
		game_decaminutes = math.floor((currTick-framesP[0])/(30*600))
		# other ticks for specific hero
		hStats_ticks = [tickstates[i][1][heroName].getHeroStats() for i in range(1,len(framesP))]
		hStats_1p = tickstates[0][1][heroName].getHeroStats()
		# ignore cases with no changes within the checked time frame
		# each single data element must be a list of values
		dataElem = []
		hStats = hero.getHeroStats()
		# {'name': 6, 'maxHealth': 655, 'health': 0, 'alive': False, 'posX': 172, 'posY': 102,
		#    'gold': 632, 'xp': 194, 'level': 0, 'team': 3, 'lastDamageTime': 0}
		hero_team = hStats["team"]
		hero_name = hStats["name"]
		dataElem.extend([hero_name, hStats_1p["maxHealth"], hStats_1p["health"]]+[hStat["health"] for hStat in hStats_ticks] +[hStats_1p["healthRegen"], hStats["lifeState"]==0,
						hStats_1p["mana"], hStats_1p["maxMana"], hStats_1p["manaRegen"], hStats_1p["agility"],
						hStats_1p["agilityTotal"], hStats_1p["intellect"], hStats_1p["intellectTotal"],
						hStats_1p["strength"], hStats_1p["strengthTotal"], hStats_1p["recentDamage"],
						hStats_1p["current_level"], hStats_1p["posX"], hStats_1p["posY"], hStats_1p["gold_total"],
						hStats_1p["xp_total"], hStats_1p["lastDamageTime"], hStats_1p["deaths"]])
		# removed: hStats_1p.get("totalDamageTaken"), hero_team
		# calculate hero distances
		nearbyFriends = 0
		nearestFriendDistance = 9999
		nearbyEnemies = 0
		nearestEnemyDistance = 9999
		nearest_enemy = -1
		for hname,heroobj in allhStats.items():
			if hname == hero_name:
				continue
			hdist = getHeroDistance(hero_name, hname, hDists)
			# check affiliation
			if heroobj["team"] == hero_team:
				# add to nearbyFriends if distance < NEARBY_DISTANCE:
				if hdist < NEARBY_DISTANCE:
					nearbyFriends += 1
				if hdist < nearestFriendDistance:
					nearestFriendDistance = hdist
			else: # enemy team or something went wrong
				if hdist < NEARBY_DISTANCE:
					nearbyEnemies += 1
				if hdist < nearestEnemyDistance:
					nearestEnemyDistance = hdist
					nearest_enemy = hname
		if nearest_enemy == -1:
			logger.error("no enemies found upon death") # this should never actually happen
		dataElem.extend([nearbyFriends, nearestFriendDistance, nearbyEnemies, nearestEnemyDistance, nearest_enemy, game_decaminutes,
				hStats_1p["kills"], hStats_1p["assists"], hStats_1p["lastAbilityTime"], hStats_1p["lastTowerAttack"],
				hStats_1p["lastDebuffTime"]])
		resultArray.append(dataElem)
	return resultArray
#if TEST:
	# the testStates will only be created by the cell below
	#res = checkStateNoteworthiness(testCurrentState, testGlobalState)
	#getDataState(testCurrentState, testDataArray)

In [9]:
# create data arrays (returns a 2-tuple with input and result data or None in case of an error)
def createDataArray(jsonContent, framesP):
	try:
		rslist = ReplayStateList(jsonContent)
	except AssertionError as e:
		logger.error("could not create data array, skipping: {}".format(e))
		return None
	# array of all states in temporal order
	dataArray = []
	# first state as given by the iterator
	(tickNum, currentState) = rslist.getCurrent()
	globalState = {} # contains long-term state properties
	# grab hero stat names and output version
	hero_features = [None, None]
	hero_features[0], hero_features[1] = zip(*getDataState(currentState, rslist, tickNum, framesP, appendResultDataTypes=True))
	# collect data
	logger.info("{} start of data array creation: {}".format(os.getpid(),time.ctime()))
	num_elements = 0
	checkedTicks = 0
	while rslist.hasNext():
		checkedTicks +=1
		# decide if a state should be included
		noteworthyHeroes, globalState = checkStateNoteworthiness(currentState, globalState, tickNum, rslist)
		if len(noteworthyHeroes)>0:
			# get current state in array form
			normalState = getDataState(currentState, rslist, tickNum, framesP, noteworthyHeroes)
			if TEST and not "testCurrentState" in globals():
				# create debug objects
				if len(normalState)>0: # the variables below are needed for tests
				    testCurrentState, testDataArray, testStateNoteworthy, testGlobalState = currentState, dataArray, stateNoteworthy, globalState
			# add state and result value to arrays
			num_elements += len(normalState)
			dataArray.extend(normalState)
		# get next message
		(tickNum, currentState) = rslist.__next__()
	#this assertion is only a general test to check if the number of arguments is off
	if len(hero_features) ==0 or len(dataArray) == 0:
		logger.error("empty data array, canceling creation")
		return
	assert len(hero_features[0]) == len(dataArray[0]), "wrong output length"
	logger.info("{} end of data array creation: {}, added {} elements, checked {} ticks".format(os.getpid(), time.ctime(), num_elements, checkedTicks))
	return dataArray, hero_features
if TEST:
	print("beginning at",time.ctime())
	resultArray, hero_features = createDataArray(jsonRoot3, [90,120,150,180,210,240])
	print("done at", time.ctime())
	#trueratio = (len([t for t in dataArray if t[5]]),len([t for t in dataArray if not t[5]]))
	#print("ratio of True/False: {}:{}".format(trueratio[0],trueratio[1]))
	# print all saves
	for elem in resultArray:
		if elem[6]:
			print(elem)

In [10]:
def writeDataArray(filename, data_array, hero_features, version, targetPath = "./"):
	if not os.path.exists(targetPath):
		os.makedirs(targetPath)
	with open(os.path.join(targetPath,filename), "w", newline='') as csvfile:
		logger.debug("writing to {}".format(os.path.join(targetPath,filename)))
		csvwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
		target_name = hero.target_name
		# write metadata: number of elements, number of features, version, name of the target
		csvwriter.writerow([len(data_array[0])] + [len(data_array)] + [version] + [target_name])
		# write column(/feature) names
		csvwriter.writerow(hero_features[0])
		# write feature data types
		csvwriter.writerow(hero_features[1])
		for i in range(0,len(data_array)):
			csvwriter.writerow(data_array[i])
if TEST:
	writeDataArray("jsonRoot-demo.csv", resultArray, hero_features, OUTPUT_VERSION, "./")

In [11]:
def jsonToCSV(json_file, framesP, output_version, targetPath = "../csv/", newfilename = None):
	jsonText = reader(json_file)
	jsonRoot = json.loads(jsonText.decode())
	try:
		dataArrayResult = createDataArray(jsonRoot, framesP)
	except:
		logger.error("Error while creating data array for {}, skipping".format(json_file))
		# error during creation => ignore
		return
	resultArray, heroProperties = dataArrayResult
	filename = json_file.split('/')[-1]
	csvName = newfilename or re.split('[.]', filename)[0]+"-"+output_version+".csv"
	writeDataArray(csvName,resultArray, heroProperties, output_version, targetPath)

if TEST:
	jsonToCSV("../dota/123456789.json", "../csv/")
#	jsonToCSV("/path/to/dota-files/dota-replays/dem/2191516938.json",
#			"/path/to/dota-files/dota-replays/csv/")
#	#jsonToCSV("../dota/123456789.json", "../csv/", framesP=[120,150,180])

In [12]:
# check if csv directory exists
def chkdir(directory):
	if not os.path.exists(directory):
		os.mkdir(directory)

elemcounter = multiprocessing.Value(ctypes.c_int)

def convert_files(json_dir, csv_dir, file_version=OUTPUT_VERSION, num_threads=4, num_files=None):
	chkdir(csv_dir)
	with elemcounter.get_lock():
		elemcounter.value = 0
	convfiles = []
	max_elems = num_files and os.listdir(json_dir)[:num_files+1] or os.listdir(json_dir)
	for elem in max_elems:
		if not re.match("[0-9]*[.]json$", elem):
			continue;
		# check if csv file already exists in this version and if so, skip it
		newfilename = re.split("[.]",elem)[0]+"-"+file_version+".csv"
		if os.path.isfile(os.path.join(csv_dir,newfilename)):
			file_version = -1
			with open(os.path.join(csv_dir,newfilename)) as output_file:
				contents = csv.reader(output_file)
				filedata = next(contents)
				file_version = filedata[2]
			if file_version == file_version:
				logger.debug("skipping {} as the output file already exists in current version ({})".format(elem, file_version))
				elemcounter.value += 1
				continue
		convfiles.append(elem)
	with elemcounter.get_lock():
		if elemcounter.value > 0:
			print("{} files skipped as the output already exists in the current version ({})".format(elemcounter.value, file_version))
	# convert to CSV
	logger.info("conversion start time:", time.ctime())
	print("conversion start time:", time.ctime())
	convtasks = [(filename, json_dir, csv_dir, file_version) for filename in convfiles]
	with Pool(processes=num_threads) as pool:
		pool.map(mpJsonToCSV, convtasks)
	logger.info("conversion end time:", time.ctime())
	print("conversion end time:", time.ctime())

def mpJsonToCSV(elem_tuple): # pseudo lambda function for other processes below
	file_name, json_dir, csv_dir, output_version = elem_tuple
	sttime = time.time()
	jsonToCSV(os.path.join(json_dir,file_name), OUTPUT_VERSION_DICT[output_version], output_version, csv_dir)
	with elemcounter.get_lock():
		elemcounter.value += 1
		print("converting {} took {} seconds, now at {} files".format(file_name, time.time()-sttime, elemcounter.value))

In [13]:
# run the line below to start file conversion from this notebook, but disable it again before converting this dictionary to a python script
#convert_files(file_version='8e', num_threads=5)#, num_files=4500)