In [1]:
import os

from pyspark import SparkContext

SparkContext.setSystemProperty('spark.executor.memory', '2g')


sc = SparkContext("spark://master:7077", "Location Finder")

print("Connected")

Connected


In [113]:
MAP_NAME = "Baltic_Main"
INPUT_ZONE_X = 360955
INPUT_ZONE_Y = 401233
INPUT_ZONE_RADIUS = 20718

In [114]:
ZONE_CENTER_OFFSET = 40000
ZONE_RADIUS_OFFSET = int(INPUT_ZONE_RADIUS / 4)

In [115]:
import math

def distance_cal(x1, y1, x2, y2):
    return math.sqrt((x2 - x1)**2 + (y2 - y1)**2)

def construst_info(e):
    splitted = e.split(",")
    attacker_x = int(splitted[3])
    attacker_y = int(splitted[4])
    
    zone_x = int(splitted[9])
    zone_y = int(splitted[10])
    zone_radius = int(splitted[11])
    
    return (
        attacker_x,
        attacker_y,
        zone_x,
        zone_y,
        zone_radius,
        int(distance_cal(attacker_x, attacker_y, INPUT_ZONE_X, INPUT_ZONE_Y)), # Player kill location offset
        int(distance_cal(zone_x, zone_y, INPUT_ZONE_X, INPUT_ZONE_Y)), # Zone center offset
        abs(zone_radius - INPUT_ZONE_RADIUS) # zone radius offset
    )

def filter_info(e):
    return True if e[6] < ZONE_CENTER_OFFSET and e[7] < ZONE_RADIUS_OFFSET else False

In [116]:
whole_file_rdd = sc.textFile("hdfs://master:9000/data/{}_data.txt".format(MAP_NAME))

header = whole_file_rdd.first() #extract header
raw_data_rdd = whole_file_rdd.filter(lambda e : e != header)

# start the logic from here

In [117]:
# split the rows and take 
splitted = raw_data_rdd.map(lambda e : construst_info(e))

splitted.take(5)

[(360955, 401233, 406387, 406387, 579718, 0, 45723, 559000),
 (383484, 476670, 406387, 406387, 579718, 78729, 45723, 559000),
 (349029, 311242, 406387, 406387, 579718, 90777, 45723, 559000),
 (349029, 311242, 406387, 406387, 579718, 90777, 45723, 559000),
 (171702, 621137, 406387, 406387, 579718, 290128, 45723, 559000)]

In [118]:
#splitted.count()

In [119]:
filtered = splitted.filter(lambda e: filter_info(e))    


In [125]:
# sort by player location
import json

final_rdd = filtered.sortBy(lambda e : e[5]).map(lambda e : (e[0], e[1])).take(5000)

In [128]:

final_json_string = [
    {
        "x": e[0],
        "y": e[1]
    }
    for e in final_rdd
]

In [129]:
final_json_string

[{'x': 360674, 'y': 400835},
 {'x': 360687, 'y': 401798},
 {'x': 360687, 'y': 401798},
 {'x': 360247, 'y': 401283},
 {'x': 360075, 'y': 400750},
 {'x': 362036, 'y': 401222},
 {'x': 360571, 'y': 402329},
 {'x': 361091, 'y': 400071},
 {'x': 361423, 'y': 402383},
 {'x': 359908, 'y': 400446},
 {'x': 359908, 'y': 400446},
 {'x': 359629, 'y': 400774},
 {'x': 361726, 'y': 402477},
 {'x': 362419, 'y': 401554},
 {'x': 359488, 'y': 400775},
 {'x': 359439, 'y': 400872},
 {'x': 359939, 'y': 402436},
 {'x': 359439, 'y': 400789},
 {'x': 362529, 'y': 401546},
 {'x': 362245, 'y': 402205},
 {'x': 360066, 'y': 399848},
 {'x': 359298, 'y': 401182},
 {'x': 359267, 'y': 401119},
 {'x': 359781, 'y': 399952},
 {'x': 360029, 'y': 399711},
 {'x': 359879, 'y': 399768},
 {'x': 359879, 'y': 399768},
 {'x': 359137, 'y': 401074},
 {'x': 362713, 'y': 401778},
 {'x': 361454, 'y': 403014},
 {'x': 359017, 'y': 401072},
 {'x': 362501, 'y': 402428},
 {'x': 362501, 'y': 402428},
 {'x': 360930, 'y': 403192},
 {'x': 359856,