In [1]:
from pyspark.sql.functions import col, lit, explode, monotonically_increasing_id, posexplode
from pyspark.sql.functions import udf,struct, collect_list
from pyspark.sql.functions import sum as fsum
import random
import numpy as np
import math
import heapq
import json
import time
from tqdm import tqdm
import gc
from pyspark.sql.types import StructType, StructField, DoubleType, IntegerType

In [2]:
#
# cose da implementare:
# nota: numerics_domains dovrebbe essere un dataframe con colonne "idx"(indice)
# "nome_numerics1"... e le colonne dei numerics devono essere ordinate 
#  dataframe con valori unici dei numerici indicizzati (v)
#  funzione per calcolare la qualità dei pattern (v)
    # modificare il dataset in modo da inserire gli indici al posto dei valori singoli (v)
    # modificare funzione di campionamento valori (v)
    # aggiungere conteggi globali delle classi (v)
#  adattare il tutto sul distribuito
    # aggiungere funzione per il dump e recupero dei patterns (v)
    # suddividere il carico sulle macchine e fare prima prova di avvio distribuito + salvataggio cose su hdfs
# funzione per codificare il dataset
#  funzioni per il training + grid search
#  funzioni per l'esplorazione dei dati

# cose da fare alla scordata:
    # dump dei dataframe "hashtables"
    # funzione di filtering e ottimizzazione prioqueue
    
    

In [3]:
def filter_data(data, target_class, target_col): 
    return data.filter(col(target_col) == target_class)

def seq_scout(data, data_plus,target_class, numerics_max, top_k, iterations, theta, alpha): #t
    data_support = data.count()
    class_support = data_plus.count()
    # create priority queue for patterns to be stored
    pi = PriorityQueue(k=top_k, theta=theta, cap_length=True) 
    
    # create priority queue for storing each class sequence and its UCB score
    scores = PriorityQueue(data_plus)
    #N = 1
    for N in tqdm(range(1,iterations+1)):
        _, Ni, mean_quality, sequence = scores.pop_first() # pop the sequence to be generalized
        print
        # generalize the sequence and add it to the patterns
        gen_seq, new_qual = play_arm(sequence, data, target_class, numerics_max, alpha, data_support, class_support)
        pi.add((-new_qual, to_imm_pattern(gen_seq)))
        # update the quality and put back the sequence in the priority queue
        updated_quality = (Ni * mean_quality + new_qual) / (Ni + 1)
        ucb_score = compute_ucb(updated_quality, Ni + 1, N)
        scores.add((-ucb_score, Ni + 1, updated_quality, sequence))
        
        #N += 1
    
    return pi.get_top_k() # priority queue filters automatically if theta <1

def play_arm(sequence, data, target_class, numerics_max, alpha, data_support, class_support): 
    sequence_m = mutable_seq_copy(sequence)
    # get the number of button pressed in the sequence
    tot_num_inputs = sum([len(state[0]) for state in sequence])
    # get a random number of input to be removed
    input_to_remove = random.randint(0, tot_num_inputs-1)

    for i in range(input_to_remove):
        selected_state_idx = random.randint(0, len(sequence_m)-1)
        selected_state = sequence_m[selected_state_idx][0] # we take the input itemset
        
        selected_state.remove(random.choice(list(selected_state))) # remove an element
        
        if len(selected_state) == 0: # if the state looses all the inputs, then it is removed
            sequence_m.pop(selected_state_idx)
    for _, numerics in sequence_m:
        for kind, value in numerics.items():
            # first we decide whether to remove the constraint or not
            if random.random() < alpha:
                numerics[kind] = [-float('inf'), float('inf')]
            else:              
                left_value = random.randint(0, value)
                right_value = random.randint(value, numerics_max[kind]-1)

                
                numerics[kind] = [left_value, right_value]

    # now we compute the quality measure
    quality = compute_WRAcc(data, sequence_m, target_class, data_support, class_support)

    return sequence_m, quality

def compute_ucb(score, Ni, N):
    # we choose C = 0.5
    return (score + 0.25) * 2 + 0.5 * math.sqrt(2 * math.log(N) / Ni)


def compute_WRAcc(data, subsequence, target_class, data_support, class_support): 
    # data support and class support were passed as it is useless to compute them everytime
    schema = StructType([
        StructField("sub_support", IntegerType(),False),
        StructField("sub_sup_c", IntegerType(), False)
    ])
    udf_subsequence = udf(lambda x,y,z: is_subsequence(subsequence,target_class, x, y, z), schema)
    support_data = data.select(udf_subsequence(data.input_sequence,
                                               data.enc_num_sequence,
                                              col("class")).alias("tmp")).select(fsum("tmp.sub_support").alias("sub_support"),
                                                                                 fsum("tmp.sub_sup_c").alias("sub_sup_c"))
    sums = support_data.head()
    support = sums["sub_support"]
    class_pattern_count = sums["sub_sup_c"]

    del sums 
    del support_data
    try:
        class_pattern_ratio = class_pattern_count / support
    except ZeroDivisionError:
        return -0.25

    class_data_ratio = class_support / data_support
    #if support>1:
    #    print(f"class_pattern_count {class_pattern_count}")
    #    print(f"support {support}")
    #    print(f"class_pattern_ratio {class_pattern_ratio}")
    #    print(f"class_data_ratio {class_data_ratio}")
    wracc = support / data_support * (class_pattern_ratio - class_data_ratio)
    if support>1:
        print(wracc)
    return wracc

def is_subsequence(subsequence,classsub, sequence_input, sequence_num, classsuper):
    # sequence input is a list of lists of strings
    # sequence num is a list of rows
    i_sub = 0
    i_seq = 0
    while i_sub<len(subsequence) and i_seq<len(sequence_input):
        if subsequence[i_sub][0].issubset(sequence_input[i_seq]):
            if all([value >= subsequence[i_sub][1][numeric][0] and value <= subsequence[i_sub][1][numeric][1] for numeric, value in
                    sequence_num[i_seq].asDict().items()]):
                i_sub += 1
        i_seq += 1
        
    if i_sub == len(subsequence):
        is_sub = 1
    else:
        is_sub = 0
    
    if classsub is not None:
        if is_sub == 1 and classsub == classsuper:
            return (is_sub,1)
        else:
            return (is_sub,0)
    else:
        return is_sub
    
#1: function SEQSCOUT(budget)
#2: 	π ← PriorityQueue()
#3: 	scores ← PriorityQueue() # ! sfruttare i dataframe distribuiti di spark

#8: 	|while budget do 
#9: 	|	seq, qual, Ni ← scores.bestUCB()
#10: |	seqp, qualp ← PlayArm(seq) #calcolo qualità parallelizzabile
#11: |	π.add(seqp, qualp)
#12: |	scores.update(seq,Ni*qual+qualp/Ni+1 , Ni + 1)
#3: |end while # while eseguito per ogni top esempio - non parallelizzabile?
#4:  
#15: return π.topKNonRedundant() # filtering (remove similar starting from the beginning)
#16: end function

#- il filtering dei dati penso possa essere fatto automaticamente con una bella filter
#- controlla come funziona la max del DataFrame
#- possibile parallelizzazione 1 per ogni classe (a livello di container -> 7 esecutori max)
#- possibile parallelizzazione sul calcolo della metrica come map + reduce
#- priority queue con i dataframe distribuiti non ha senso, ma pi può essere implementata easy come una lista
#	che flitra automaticamente i migliori k

In [4]:
def read_dataset(path):
    DISCRETE_INPUTS = {'up', 'accelerate', 'slow', 'goal', 'left', 'boost', 'camera', 'down', 'right', 'slide', 'jump'}
    data = []
    with open(path, "r") as file:
        dict_headers = next(file).split()
        new_line = dict()
        for line in file:
            if len(line.split()) <= 1:
                if new_line:
                    data.append(new_line)
                new_line = {"input_sequence": [] ,"num_sequence":[],"class": line.strip()}
            else:
                if len(dict_headers) != len(line.split()):
                    raise ValueError('Number of data and variables do not match')

                numerics = {}
                buttons = []

                for i, value in enumerate(line.split()):
                    if dict_headers[i] in DISCRETE_INPUTS:
                        if value == '1':
                            buttons.append(dict_headers[i])
                    else:
                        numerics[dict_headers[i]] = float(value)

                #state = [buttons, numerics]
                new_line["input_sequence"].append(buttons)
                new_line["num_sequence"].append(numerics)
        data.append(new_line)
    return data


In [5]:
def get_numerics(df):
    subfields = df.schema["num_sequence"].dataType.elementType.fieldNames()
    numerics_domains = {}
    numerics_max = {}
    for c in subfields:
        field = "num_sequence." + c
        no_idx = df.select(explode(field).alias(c)).distinct().orderBy(c)
        numerics_domains[c] = no_idx.withColumn("idx", monotonically_increasing_id())
        numerics_max["idx"+c] = numerics_domains[c].count()
    return numerics_domains, numerics_max

def convert_numerics(df, numerics_domains):
    workdf = df.select(col("id").alias("_id"),posexplode("num_sequence").alias("pos","exp")).select("_id", "pos", "exp.*")
    needed_columns = [i for i in numerics_domains.keys()]
    needed_columns.append("pos")
    needed_columns.append("_id")
    for kind, unique_df in numerics_domains.items():
        print("processing " + kind + "...")
        expr1 = kind + " as _" + kind
        expr2 = "idx as idx" + kind
        workdf = workdf.join(unique_df.selectExpr(expr1, expr2), col(kind)==col("_"+kind))
        needed_columns.remove(kind)
        needed_columns.append("idx"+kind)
        workdf = workdf.select(needed_columns)
    needed_columns.remove("_id")
    needed_columns.remove("pos")
    workdf = workdf.orderBy("_id", "pos")
    workdf = workdf.groupBy("_id", "pos").agg(collect_list(struct([col(i) for i in needed_columns])).alias("enc_num_sequence"))
    return workdf.groupBy("_id").agg(collect_list(col("enc_num_sequence")[0]).alias("enc_num_sequence"))

In [6]:
def import_imm_sequence(seq):
    return tuple([tuple([frozenset(seq[0][i]), tuple(sorted(seq[1][i].asDict().items()))]) for i in range(len(seq[0]))])
def mutable_seq_copy(seq):
    copy = []
    for i in seq:
        input_set = set(i[0])
        num_dict = {j[0] : j[1] for j in i[1]}
        copy.append([input_set, num_dict])
    return copy
        
def to_imm_pattern(pattern):
    return tuple([tuple([frozenset(i[0]), tuple(sorted([(key, tuple(value)) for key, value in i[1].items()]))]) for i in
                  pattern])

def save_patterns(patterns, filename):
    with open(filename, "wb") as file:
        pickle.dump(patterns, file)

def load_patterns(filename):
    with open(filename, "rb") as file:
        to_return = pickle.load(filename)
    return to_return

In [7]:
class PriorityQueue(object):
    def __init__(self, data=None, k=1,theta=1, cap_length=False):
        self.k = k
        self.theta=theta
        self.cap_length=cap_length if k is not None else False
        if data is not None:  
            self.heap = [tuple([-float('inf'), 0, 0, import_imm_sequence((x["input_sequence"], x["enc_num_sequence"]))]) for x in data.collect()]
            heapq.heapify(self.heap)
            if cap_length and len(self.heap)>self.k:
                self.heap = heapq.nlargest(self.k, self.heap)
            self.seq_set = set([i[-1] for i in self.heap])
        else:
            self.heap = []
            self.seq_set = set()

    def add(self, elem):
        if elem[-1] not in self.seq_set:
            heapq.heappush(self.heap, elem)
            self.seq_set.add(elem[-1])
            if self.cap_length and len(self.heap)>self.k:
                self.heap = heapq.nsmallest(self.k, self.heap)
                self.seq_set = set([i[-1] for i in self.heap])
                #TODO add filtering if necessary
    def pop_first(self):
        head = heapq.heappop(self.heap)
        self.seq_set.remove(head[-1])
        return head
    
    def get_top_k(self):
        if self.theta == 1:
            return heapq.nsmallest(self.k, self.heap)
        else:
            return 0
            #TODO add filtering
    
    
    
#            def add(self, elem):
#        if elem[-1] not in self.seq_set:
#            if len(self.heap)<self.k or not cap_length:
#                heapq.heappush(self.heap, elem)
#                self.seq_set.add(elem[-1])
#            else:
#                last_queue = max(self.heap, key=lambda x: x[0])
#                if elem[0]<last_queue[0]:
#                    
#            if self.cap_length and len(self.heap)>self.k:
#                self.heap = heapq.nsmallest(self.k, self.heap)
#                self.seq_set = set([i[-1] for i in self.heap])
#                #TODO add filtering if necessary

In [8]:
data = read_dataset("/vagrant/rocket_league_skillshots.data")
# in case of bigger datasets, single splits could be generated on different nodes
# and after joined as single json file
print(len(data))

298


In [18]:
np.unique([c['class'] for c in data])

array(['-1', '1', '2', '3', '5', '6', '7'], dtype='<U2')

In [9]:

spark = SparkSession.builder.appName("RocketLeagueFE").getOrCreate()
with open("source.json", "w") as s:
    s.write(json.dumps(data))

23/02/12 16:59:14 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


In [10]:
df = spark.read.format("json").load("source.json")

                                                                                

In [15]:
df.select("class").distinct().count()

7

In [13]:
df.printSchema()
df.show()

root
 |-- class: string (nullable = true)
 |-- input_sequence: array (nullable = true)
 |    |-- element: array (containsNull = true)
 |    |    |-- element: string (containsNull = true)
 |-- num_sequence: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- BallAcceleration: double (nullable = true)
 |    |    |-- BallSpeed: double (nullable = true)
 |    |    |-- DistanceBall: double (nullable = true)
 |    |    |-- DistanceCeil: double (nullable = true)
 |    |    |-- DistanceWall: double (nullable = true)
 |    |    |-- PlayerSpeed: double (nullable = true)
 |    |    |-- Time: double (nullable = true)

+-----+--------------------+--------------------+
|class|      input_sequence|        num_sequence|
+-----+--------------------+--------------------+
|    6|[[right, jump], [...|[{1636.7987723122...|
|   -1|[[boost, right, j...|[{0.0, 33685.8395...|
|   -1|[[right, jump], [...|[{124246.29375405...|
|   -1|[[right, slide, j...|[{-8210.634011562...|
|

In [15]:
df.explain()

== Physical Plan ==
FileScan json [class#8,input_sequence#9,num_sequence#10] Batched: false, DataFilters: [], Format: JSON, Location: InMemoryFileIndex(1 paths)[file:/vagrant/source.json], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<class:string,input_sequence:array<array<string>>,num_sequence:array<struct<BallAcceleratio...




In [16]:
df = df.withColumn("id", monotonically_increasing_id())
numerics_domains, numerics_max = get_numerics(df)
encoded_numerics = convert_numerics(df, numerics_domains)
dfj = df.join(encoded_numerics, col("id")==col("_id")).select("id","input_sequence" ,"enc_num_sequence", "class")
dfj.printSchema()

processing BallAcceleration...
processing BallSpeed...
processing DistanceBall...
processing DistanceCeil...
processing DistanceWall...
processing PlayerSpeed...
processing Time...
root
 |-- id: long (nullable = false)
 |-- input_sequence: array (nullable = true)
 |    |-- element: array (containsNull = true)
 |    |    |-- element: string (containsNull = true)
 |-- enc_num_sequence: array (nullable = false)
 |    |-- element: struct (containsNull = false)
 |    |    |-- idxBallAcceleration: long (nullable = false)
 |    |    |-- idxBallSpeed: long (nullable = false)
 |    |    |-- idxDistanceBall: long (nullable = false)
 |    |    |-- idxDistanceCeil: long (nullable = false)
 |    |    |-- idxDistanceWall: long (nullable = false)
 |    |    |-- idxPlayerSpeed: long (nullable = false)
 |    |    |-- idxTime: long (nullable = false)
 |-- class: string (nullable = true)



In [13]:
encoded_numerics.printSchema()

root
 |-- _id: long (nullable = false)
 |-- enc_num_sequence: array (nullable = false)
 |    |-- element: struct (containsNull = false)
 |    |    |-- idxBallAcceleration: long (nullable = false)
 |    |    |-- idxBallSpeed: long (nullable = false)
 |    |    |-- idxDistanceBall: long (nullable = false)
 |    |    |-- idxDistanceCeil: long (nullable = false)
 |    |    |-- idxDistanceWall: long (nullable = false)
 |    |    |-- idxPlayerSpeed: long (nullable = false)
 |    |    |-- idxTime: long (nullable = false)



In [18]:
dfj.show()

+---+--------------------+--------------------+-----+
| id|      input_sequence|    enc_num_sequence|class|
+---+--------------------+--------------------+-----+
|  0|[[right, jump], [...|[{4250, 1141, 234...|    6|
|  1|[[boost, right, j...|[{3454, 226, 1028...|   -1|
|  2|[[right, jump], [...|[{5668, 4313, 161...|   -1|
|  3|[[right, slide, j...|[{699, 3273, 4168...|   -1|
|  4|[[right], [boost,...|[{4102, 2291, 98,...|   -1|
|  5|[[boost, right, j...|[{5274, 3573, 146...|    6|
|  6|[[down, right], [...|[{3454, 4278, 229...|    1|
|  7|[[right], [right,...|[{4761, 3013, 81,...|    7|
|  8|[[right, jump], [...|[{693, 2833, 4578...|    1|
|  9|[[slide], [right]...|[{5408, 780, 709,...|    6|
| 10|[[boost, right, j...|[{2939, 224, 5575...|    2|
| 11|[[right], [right,...|[{3454, 147, 2380...|    1|
| 12|[[right, slide], ...|[{3454, 2274, 102...|    7|
| 13|[[right, slide, j...|[{4985, 2472, 14,...|    6|
| 14|[[right, slide, j...|[{1074, 4293, 184...|    2|
| 15|[[right, slide, j...|[{

In [19]:
dfj.write.format("json").save("elaborated df.json")

In [36]:
print(numerics_max)

{'idxBallAcceleration': 5747, 'idxBallSpeed': 5958, 'idxDistanceBall': 6762, 'idxDistanceCeil': 3631, 'idxDistanceWall': 5721, 'idxPlayerSpeed': 5942, 'idxTime': 5903}


In [20]:
dfa = spark.read.format("json").load("elaborated df.json")

In [22]:
dfa.show()
dfa.explain()

+-----+--------------------+---+--------------------+
|class|    enc_num_sequence| id|      input_sequence|
+-----+--------------------+---+--------------------+
|    6|[{4250, 1141, 234...|  0|[[right, jump], [...|
|   -1|[{3454, 226, 1028...|  1|[[boost, right, j...|
|   -1|[{5668, 4313, 161...|  2|[[right, jump], [...|
|   -1|[{699, 3273, 4168...|  3|[[right, slide, j...|
|   -1|[{4102, 2291, 98,...|  4|[[right], [boost,...|
|    6|[{5274, 3573, 146...|  5|[[boost, right, j...|
|    1|[{3454, 4278, 229...|  6|[[down, right], [...|
|    7|[{4761, 3013, 81,...|  7|[[right], [right,...|
|    1|[{693, 2833, 4578...|  8|[[right, jump], [...|
|    6|[{5408, 780, 709,...|  9|[[slide], [right]...|
|    2|[{2939, 224, 5575...| 10|[[boost, right, j...|
|    1|[{3454, 147, 2380...| 11|[[right], [right,...|
|    7|[{3454, 2274, 102...| 12|[[right, slide], ...|
|    6|[{4985, 2472, 14,...| 13|[[right, slide, j...|
|    2|[{1074, 4293, 184...| 14|[[right, slide, j...|
|    1|[{635, 4153, 4272...|

In [29]:
patterns = seq_scout(dfa, filter_data(dfa, "1", "class"),"1", numerics_max, 30, 1000, 1, 0.5)

  0%|▎                                                                                 | 4/1000 [00:00<02:31,  6.56it/s]

0.0060357641547678035


  3%|██▏                                                                              | 27/1000 [00:03<02:03,  7.88it/s]

0.01105806044772758


  6%|████▍                                                                            | 55/1000 [00:07<01:57,  8.03it/s]

0.06657357776676726


  7%|█████▊                                                                           | 71/1000 [00:09<02:11,  7.09it/s]

-0.016598351425611457


  8%|██████▏                                                                          | 77/1000 [00:10<01:54,  8.03it/s]

-0.0004729516688437448


  8%|██████▋                                                                          | 83/1000 [00:10<02:02,  7.47it/s]

0.005360118913562453


 10%|███████▉                                                                         | 98/1000 [00:12<01:54,  7.86it/s]

0.012071528309535607


 12%|█████████▎                                                                      | 117/1000 [00:15<01:55,  7.67it/s]

0.008378000990946355


 15%|████████████                                                                    | 150/1000 [00:19<01:53,  7.52it/s]

0.005022296292959778


 16%|████████████▉                                                                   | 161/1000 [00:20<01:42,  8.16it/s]

0.005697941534165128


 16%|█████████████                                                                   | 163/1000 [00:21<01:47,  7.76it/s]

0.0026800594567812264


 17%|█████████████▍                                                                  | 168/1000 [00:21<01:46,  7.82it/s]

0.009053646232151704


 20%|████████████████▎                                                               | 204/1000 [00:26<01:38,  8.06it/s]

0.01605783523264718


 22%|█████████████████▋                                                              | 221/1000 [00:28<01:38,  7.92it/s]

0.024773658844196206


 26%|█████████████████████                                                           | 263/1000 [00:34<01:31,  8.05it/s]

0.0026800594567812264


 27%|█████████████████████▎                                                          | 266/1000 [00:34<01:33,  7.84it/s]

0.03348948245574524


 27%|█████████████████████▋                                                          | 271/1000 [00:35<01:29,  8.11it/s]

0.005697941534165128


 28%|██████████████████████▍                                                         | 281/1000 [00:36<01:28,  8.13it/s]

0.009031124724111527


 29%|██████████████████████▉                                                         | 287/1000 [00:37<01:34,  7.56it/s]

0.013738119904508808


 29%|███████████████████████▎                                                        | 291/1000 [00:37<01:27,  8.11it/s]

0.002004414215575875
0.005022296292959778


 30%|████████████████████████▏                                                       | 303/1000 [00:39<01:24,  8.21it/s]

0.0026800594567812264


 35%|████████████████████████████▏                                                   | 353/1000 [00:45<01:28,  7.30it/s]

0.013715598396468628


 37%|█████████████████████████████▍                                                  | 368/1000 [00:47<01:18,  8.06it/s]

0.002004414215575875


 39%|██████████████████████████████▉                                                 | 386/1000 [00:49<01:15,  8.11it/s]

-0.0021620647718571216
0.0026800594567812264


 40%|████████████████████████████████▏                                               | 403/1000 [00:51<01:14,  8.05it/s]

0.007702355749741002


 42%|██████████████████████████████████                                              | 425/1000 [00:54<01:10,  8.16it/s]

0.005697941534165128


 46%|████████████████████████████████████▌                                           | 457/1000 [00:58<01:09,  7.85it/s]

0.00871582361154903


 47%|█████████████████████████████████████▎                                          | 466/1000 [00:59<01:05,  8.18it/s]

-0.003085446601504436


 47%|█████████████████████████████████████▍                                          | 468/1000 [01:00<01:10,  7.59it/s]

0.0060357641547678035


 49%|██████████████████████████████████████▉                                         | 486/1000 [01:02<01:02,  8.27it/s]

0.002004414215575875


 50%|███████████████████████████████████████▊                                        | 497/1000 [01:03<01:02,  8.11it/s]

0.00400882843115175


 52%|█████████████████████████████████████████▊                                      | 522/1000 [01:07<01:02,  7.66it/s]

0.0060357641547678035


 53%|██████████████████████████████████████████▎                                     | 529/1000 [01:07<00:59,  7.92it/s]

0.0043241295437142484


 55%|███████████████████████████████████████████▌                                    | 545/1000 [01:10<01:01,  7.37it/s]

0.009053646232151704


 56%|████████████████████████████████████████████▌                                   | 557/1000 [01:11<01:00,  7.32it/s]

0.0060357641547678035


 56%|█████████████████████████████████████████████                                   | 564/1000 [01:12<00:55,  7.84it/s]

0.0006531237331651729


 57%|█████████████████████████████████████████████▋                                  | 571/1000 [01:13<00:54,  7.85it/s]

0.005022296292959778


 59%|███████████████████████████████████████████████                                 | 588/1000 [01:15<00:50,  8.13it/s]

0.00871582361154903


 63%|██████████████████████████████████████████████████                              | 626/1000 [01:20<00:47,  7.86it/s]

0.0026800594567812264


 64%|███████████████████████████████████████████████████▌                            | 645/1000 [01:22<00:45,  7.77it/s]

0.0060357641547678035


 66%|████████████████████████████████████████████████████▋                           | 658/1000 [01:24<00:46,  7.40it/s]

-0.002995360569343719


 66%|█████████████████████████████████████████████████████                           | 664/1000 [01:25<00:42,  7.96it/s]

0.05357866762758434


 67%|█████████████████████████████████████████████████████▎                          | 666/1000 [01:25<00:42,  7.94it/s]

0.06436646997882979


 69%|███████████████████████████████████████████████████████▍                        | 693/1000 [01:29<00:39,  7.71it/s]

0.06402864735822711


 70%|████████████████████████████████████████████████████████▍                       | 705/1000 [01:30<00:37,  7.91it/s]

0.0026800594567812264


 71%|████████████████████████████████████████████████████████▉                       | 711/1000 [01:31<00:36,  7.98it/s]

0.015742534120084684


 74%|███████████████████████████████████████████████████████████▍                    | 743/1000 [01:35<00:31,  8.09it/s]

0.04053871447232107
0.0026575379487410473


 75%|███████████████████████████████████████████████████████████▋                    | 746/1000 [01:35<00:30,  8.28it/s]

0.0026800594567812264
0.009053646232151704


 76%|████████████████████████████████████████████████████████████▋                   | 759/1000 [01:37<00:29,  8.18it/s]

0.010675194811044548


 77%|█████████████████████████████████████████████████████████████▎                  | 766/1000 [01:38<00:29,  7.86it/s]

-0.0010810323859285608


 77%|█████████████████████████████████████████████████████████████▌                  | 770/1000 [01:39<00:28,  8.19it/s]

0.009053646232151704


 84%|██████████████████████████████████████████████████████████████████▉             | 836/1000 [01:47<00:19,  8.31it/s]

0.0009909463537678483


 85%|███████████████████████████████████████████████████████████████████▉            | 849/1000 [01:49<00:18,  8.00it/s]

0.04484032250799514


 85%|████████████████████████████████████████████████████████████████████            | 851/1000 [01:49<00:21,  6.99it/s]

0.02376019098238818


 87%|█████████████████████████████████████████████████████████████████████▋          | 871/1000 [01:51<00:16,  8.05it/s]

0.0019818927075356966


 88%|██████████████████████████████████████████████████████████████████████          | 876/1000 [01:52<00:15,  8.02it/s]

0.040133327327597855


 90%|███████████████████████████████████████████████████████████████████████▌        | 895/1000 [01:55<00:14,  7.33it/s]

0.0060357641547678035


 91%|████████████████████████████████████████████████████████████████████████▌       | 907/1000 [01:56<00:11,  8.13it/s]

0.013963334984910593


 91%|████████████████████████████████████████████████████████████████████████▉       | 912/1000 [01:57<00:11,  7.96it/s]

-0.00815278591054457


 92%|██████████████████████████████████████████████████████████████████████████      | 925/1000 [01:58<00:09,  8.25it/s]

0.0033106616819062206


 94%|███████████████████████████████████████████████████████████████████████████▏    | 940/1000 [02:00<00:07,  7.98it/s]

0.052430070717535256


 95%|███████████████████████████████████████████████████████████████████████████▋    | 946/1000 [02:01<00:06,  7.93it/s]

0.009031124724111527
0.025021395432638174


 96%|█████████████████████████████████████████████████████████████████████████████   | 963/1000 [02:03<00:04,  7.41it/s]

0.035921805324084496


 98%|██████████████████████████████████████████████████████████████████████████████▋ | 984/1000 [02:06<00:01,  8.13it/s]

0.0026800594567812264


100%|███████████████████████████████████████████████████████████████████████████████| 1000/1000 [02:08<00:00,  7.79it/s]


In [34]:
print(patterns)

[(-0.06657357776676726, ((frozenset({'jump'}), (('idxBallAcceleration', (-inf, inf)), ('idxBallSpeed', (24, 4057)), ('idxDistanceBall', (875, 6722)), ('idxDistanceCeil', (29, 2787)), ('idxDistanceWall', (-inf, inf)), ('idxPlayerSpeed', (363, 3604)), ('idxTime', (-inf, inf)))),)), (-0.06436646997882979, ((frozenset({'camera'}), (('idxBallAcceleration', (924, 4590)), ('idxBallSpeed', (-inf, inf)), ('idxDistanceBall', (-inf, inf)), ('idxDistanceCeil', (95, 1263)), ('idxDistanceWall', (-inf, inf)), ('idxPlayerSpeed', (27, 3810)), ('idxTime', (-inf, inf)))),)), (-0.06402864735822711, ((frozenset({'slide'}), (('idxBallAcceleration', (652, 5652)), ('idxBallSpeed', (-inf, inf)), ('idxDistanceBall', (-inf, inf)), ('idxDistanceCeil', (-inf, inf)), ('idxDistanceWall', (96, 5553)), ('idxPlayerSpeed', (168, 3507)), ('idxTime', (-inf, inf)))), (frozenset({'right'}), (('idxBallAcceleration', (620, 5039)), ('idxBallSpeed', (-inf, inf)), ('idxDistanceBall', (4348, 6168)), ('idxDistanceCeil', (80, 224))

In [36]:
import pickle
with open("pattern.dmp", "wb") as f:
    pickle.dump(patterns,f)
print("done")

done


In [37]:
with open("pattern.dmp", "rb") as f:
    papa = pickle.load(f)
print(papa)


[(-0.06657357776676726, ((frozenset({'jump'}), (('idxBallAcceleration', (-inf, inf)), ('idxBallSpeed', (24, 4057)), ('idxDistanceBall', (875, 6722)), ('idxDistanceCeil', (29, 2787)), ('idxDistanceWall', (-inf, inf)), ('idxPlayerSpeed', (363, 3604)), ('idxTime', (-inf, inf)))),)), (-0.06436646997882979, ((frozenset({'camera'}), (('idxBallAcceleration', (924, 4590)), ('idxBallSpeed', (-inf, inf)), ('idxDistanceBall', (-inf, inf)), ('idxDistanceCeil', (95, 1263)), ('idxDistanceWall', (-inf, inf)), ('idxPlayerSpeed', (27, 3810)), ('idxTime', (-inf, inf)))),)), (-0.06402864735822711, ((frozenset({'slide'}), (('idxBallAcceleration', (652, 5652)), ('idxBallSpeed', (-inf, inf)), ('idxDistanceBall', (-inf, inf)), ('idxDistanceCeil', (-inf, inf)), ('idxDistanceWall', (96, 5553)), ('idxPlayerSpeed', (168, 3507)), ('idxTime', (-inf, inf)))), (frozenset({'right'}), (('idxBallAcceleration', (620, 5039)), ('idxBallSpeed', (-inf, inf)), ('idxDistanceBall', (4348, 6168)), ('idxDistanceCeil', (80, 224))