In [1]:
import numpy as np
import pandas as pd
from collections import Counter
from itertools import combinations, chain
from collections import defaultdict
import pickle

### Transaction DB

In [2]:
# create transaction db matrix from picks_bans
db = pd.read_csv('./data/picks_bans.csv')
db

Unnamed: 0,hero_id,team,order,match_id
0,128.0,0.0,0.0,7276712204
1,82.0,1.0,1.0,7276712204
2,85.0,1.0,2.0,7276712204
3,61.0,0.0,3.0,7276712204
4,75.0,1.0,4.0,7276712204
...,...,...,...,...
720211,13.0,1.0,19.0,7914293221
720212,120.0,1.0,20.0,7914293221
720213,43.0,0.0,21.0,7914293221
720214,126.0,0.0,22.0,7914293221


In [3]:
# use order to filter picks per team
order_picks = [7, 8, 12, 13, 14, 15, 16, 17, 22, 23]
db = db[db['order'].isin(order_picks)].groupby(['match_id', 'team'])['hero_id'].apply(set).tolist()
db

[{23.0, 101.0, 104.0, 105.0, 114.0},
 {4.0, 55.0, 62.0, 87.0, 120.0},
 {38.0, 52.0, 84.0, 86.0, 109.0},
 {5.0, 9.0, 16.0, 46.0, 99.0},
 {40.0, 44.0, 123.0, 126.0, 135.0},
 {15.0, 16.0, 19.0, 49.0, 64.0},
 {9.0, 10.0, 16.0, 92.0, 100.0},
 {33.0, 86.0, 89.0, 106.0, 110.0},
 {5.0, 14.0, 16.0, 41.0, 80.0},
 {21.0, 39.0, 71.0, 84.0, 109.0},
 {5.0, 16.0, 26.0, 44.0, 106.0},
 {18.0, 22.0, 85.0, 86.0, 120.0},
 {5.0, 35.0, 40.0, 109.0, 120.0},
 {38.0, 84.0, 86.0, 88.0, 89.0},
 {1.0, 22.0, 40.0, 87.0, 136.0},
 {4.0, 31.0, 43.0, 74.0, 135.0},
 {10.0, 51.0, 52.0, 98.0, 110.0},
 {9.0, 38.0, 119.0, 137.0, 138.0},
 {5.0, 23.0, 26.0, 98.0, 138.0},
 {2.0, 21.0, 41.0, 51.0, 69.0},
 {22.0, 58.0, 67.0, 74.0, 99.0},
 {31.0, 95.0, 98.0, 100.0, 128.0},
 {51.0, 94.0, 98.0, 106.0, 110.0},
 {4.0, 119.0, 121.0, 128.0, 129.0},
 {5.0, 14.0, 46.0, 51.0, 106.0},
 {39.0, 61.0, 84.0, 93.0, 128.0},
 {4.0, 29.0, 64.0, 101.0, 112.0},
 {11.0, 54.0, 60.0, 68.0, 74.0},
 {65.0, 82.0, 86.0, 126.0, 137.0},
 {41.0, 51.0, 74.0, 

In [4]:
len(db)

60018

### Helper Functions

In [5]:
# fp_growth item mining
def fpgrowth(db, minsup):
    def build_fptree(db):
        counts = {
            item: count
            for item, count in Counter(chain.from_iterable(db)).items()
            if count >= minsup
        }
        if not counts:
            return None, None
            
        header = defaultdict(list)
        root = {'count': 0, 'children': {}, 'parent': None}
        for transaction in db:
            sorted_items = sorted([item for item in transaction if item in counts], key=lambda item: (-counts[item], item))
            current_node = root
            for item in sorted_items:
                if item not in current_node['children']:
                    new_node = {'count': 0, 'children': {}, 'parent': current_node}
                    current_node['children'][item] = new_node
                    header[item].append(new_node)
                
                current_node = current_node['children'][item]
                current_node['count'] += 1

        return root, header
    
    def mine_fptree(header, prefix):
        result = []
        items = sorted(header.items(), key=lambda x: (sum(node['count'] for node in x[1]), x[0]))
        
        for item, nodes in items:
            new_freqset = prefix + [item]
            support = sum(node['count'] for node in nodes)
            result.append((tuple(sorted(new_freqset)), support))
            conditional_base = []
            for node in nodes:
                path = []
                current = node['parent']
                while current and current['parent'] is not None:
                    for parent_item, parent_node in current['parent']['children'].items():
                        if parent_node is current:
                            path.append(parent_item)
                            break
                    current = current['parent']
                
                if path:
                    path.reverse()
                    conditional_base.append((path, node['count']))
                    
            if conditional_base:
                subtree, subheader = build_fptree([path for path, count in conditional_base for _ in range(count)])
                if subheader:
                    result.extend(mine_fptree(subheader, new_freqset))

        return result
    
    root, header = build_fptree(db)
    if not header:
        return []
    
    freq_itemsets = mine_fptree(header, [])
    return sorted(freq_itemsets, key=lambda x: (-x[1], -len(x[0]), x[0]))


# association rule generation
def assoc(fi, minconf, db_size):
    support_dict = dict(fi)
    rules = [
        {
            'antecedent': tuple(sorted(antecedent)),
            'consequent': consequent[0],
            'support': support,
            'confidence': conf,
            'lift': conf / (relSup / db_size)
        }
        for itemset, support in fi if len(itemset) > 1
        for antecedent in chain.from_iterable(combinations(itemset, r) for r in range(1, len(itemset)))
        if len((consequent := tuple(sorted(set(itemset) - set(antecedent))))) == 1
        if (antecedent_support := support_dict.get(tuple(sorted(antecedent)), 0)) > 0
        if (conf := support / antecedent_support) >= minconf
        if (relSup := support_dict.get(consequent, 0)) > 0
    ]

    return sorted(rules, key=lambda x: (-x['lift'], -x['confidence'], -x['support'], x['consequent'], x['antecedent']))


# brute force for checking fp algo
def brute(db, minsup):
    return sorted([(itemset, support)
                   for itemset, support in Counter(
                       tuple(sorted(itemset))
                       for p in db
                       for r in range(1, len(p) + 1)
                       for itemset in combinations(p, r)
                       ).items()
                   if support >= minsup],
                  key=lambda x: (-x[1], -len(x[0]), x[0]))

### Frequent Itemset Mining

In [6]:
# generate frequent items
fi_fpgrowth = fpgrowth(db, 2)
fi_fpgrowth

[((86.0,), 7899),
 ((20.0,), 6530),
 ((5.0,), 5558),
 ((19.0,), 5360),
 ((121.0,), 5028),
 ((96.0,), 4943),
 ((48.0,), 4867),
 ((21.0,), 4645),
 ((26.0,), 4453),
 ((123.0,), 4367),
 ((46.0,), 4345),
 ((14.0,), 4139),
 ((106.0,), 4059),
 ((120.0,), 4017),
 ((72.0,), 3861),
 ((71.0,), 3851),
 ((49.0,), 3789),
 ((10.0,), 3748),
 ((25.0,), 3735),
 ((22.0,), 3710),
 ((119.0,), 3691),
 ((74.0,), 3590),
 ((18.0,), 3560),
 ((100.0,), 3476),
 ((110.0,), 3470),
 ((53.0,), 3466),
 ((129.0,), 3385),
 ((137.0,), 3369),
 ((98.0,), 3360),
 ((23.0,), 3280),
 ((63.0,), 3259),
 ((51.0,), 3239),
 ((17.0,), 3040),
 ((11.0,), 3032),
 ((79.0,), 3030),
 ((84.0,), 3010),
 ((54.0,), 2942),
 ((13.0,), 2882),
 ((83.0,), 2875),
 ((69.0,), 2874),
 ((104.0,), 2866),
 ((64.0,), 2789),
 ((2.0,), 2743),
 ((45.0,), 2741),
 ((41.0,), 2706),
 ((87.0,), 2692),
 ((105.0,), 2683),
 ((114.0,), 2638),
 ((97.0,), 2616),
 ((81.0,), 2594),
 ((7.0,), 2554),
 ((28.0,), 2469),
 ((95.0,), 2431),
 ((65.0,), 2422),
 ((58.0,), 2408),
 

In [7]:
# checking algo
fi_brute = brute(db, 2)
fi_brute == fi_fpgrowth

True

In [8]:
# generate assoc rules
rules = assoc(fi_fpgrowth, 0, len(db))
rules

[{'antecedent': (20.0, 27.0, 71.0, 93.0),
  'consequent': 113.0,
  'support': 2,
  'confidence': 1.0,
  'lift': 124.51867219917013},
 {'antecedent': (27.0, 71.0, 93.0),
  'consequent': 113.0,
  'support': 2,
  'confidence': 1.0,
  'lift': 124.51867219917013},
 {'antecedent': (13.0, 101.0, 104.0, 110.0),
  'consequent': 12.0,
  'support': 2,
  'confidence': 1.0,
  'lift': 102.77054794520546},
 {'antecedent': (20.0, 25.0, 87.0, 104.0),
  'consequent': 12.0,
  'support': 2,
  'confidence': 1.0,
  'lift': 102.77054794520546},
 {'antecedent': (69.0, 74.0, 121.0, 123.0),
  'consequent': 12.0,
  'support': 2,
  'confidence': 1.0,
  'lift': 102.77054794520546},
 {'antecedent': (9.0, 55.0, 86.0),
  'consequent': 82.0,
  'support': 2,
  'confidence': 1.0,
  'lift': 99.69767441860465},
 {'antecedent': (26.0, 54.0, 86.0, 104.0),
  'consequent': 82.0,
  'support': 2,
  'confidence': 1.0,
  'lift': 99.69767441860465},
 {'antecedent': (57.0, 66.0, 86.0),
  'consequent': 82.0,
  'support': 2,
  'confi

In [9]:
# save assoc rules
with open('./data/rules1.pkl', 'wb') as file:
    pickle.dump(rules, file)