In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
import sys
sys.path.append('../Task 1/')
from efficient_apriori import apriori
from improved_apriori import Improved_Apriori
import json
import time
import os
import itertools
import ast
from tqdm import tqdm
import collections
import math
import random

In [2]:
# Process the dataset by chunks into tid and the list of bakerys the sales watched
def process_bakery_chunk(df, carry_over):
    if carry_over is not None:
        df = pd.concat([carry_over, df])
    groups = df.groupby('tid')['article'].apply(list)
    last_sales = df.iloc[-1]['tid']
    if last_sales in groups:
        carry_over = df[df['tid'] == last_sales]
        groups = groups.drop(last_sales)
    else:
        carry_over = None
    return groups, carry_over

In [3]:
carry_over = None
chunksize = 100000 # adjust this value depending on your available memory
# Might have to figure out a way to shuffle the dataset 
if(not os.path.exists('processed_bakery_output.txt')):
    with open('processed_bakery_output.txt', 'w') as f:
        for chunk in pd.read_csv('Bakery_sales.csv', chunksize=chunksize):
            groups, carry_over = process_bakery_chunk(chunk, carry_over)
            for sales, bakery_list in groups.items():
                f.write(f'{sales} {bakery_list}\n')

        # don't forget to process the last carry_over
        if carry_over is not None:
            groups, _ = process_bakery_chunk(carry_over, None)
            for sales, bakery_list in groups.items():
                f.write(f'{sales} {bakery_list}\n')

In [4]:
# Shuffle the text in chunks 
def shuffle_large_file(file_name, output_file_name, chunk_size):
    with open(file_name, 'r') as f:
        while True:
            lines = list(itertools.islice(f, chunk_size))
            if not lines:
                break
            random.shuffle(lines)
            with open(output_file_name, 'a') as out:
                out.write(''.join(lines))


# Call the function with your parameters
if(not os.path.exists('processed_bakery_output_shuffled.txt')):
    shuffle_large_file('processed_bakery_output.txt', 'processed_bakery_output_shuffled.txt', 3000000)

In [5]:
def read_file_in_partitions(file_path, partition_size):
    with open(file_path, 'r') as file:
        partition = []
        for line in file:
            partition.append(line)
            if len(partition) >= partition_size:
                yield partition
                partition = []
        if partition:  # yield any remaining lines
            yield partition

In [6]:
# Global variable to get the counts of all itemsets
global_counts = {}
def generate_global_counts(partition, global_candidates):

    # For 1th itemset, generate the transaction id list for the ith partition 
    transaction_id_dict = collections.defaultdict(list)
    for transaction_id in partition:
        for item in partition[transaction_id]:
            item_tuple = (item,)
            transaction_id_dict[item_tuple].append(transaction_id)

    # Filter based on the global candidates formed
    transaction_ids_dict = {item: transaction_ids for item, transaction_ids in transaction_id_dict.items() if item in global_candidates[1]}

    # Get the global count of all 1th itemset
    for item in transaction_id_dict:
        if(len(item) not in global_counts):
            global_counts[len(item)] = {}
        if(item not in global_counts[len(item)]):
            global_counts[len(item)][item] = len(transaction_id_dict[item])
        else:
            global_counts[len(item)][item] += len(transaction_id_dict[item])

    # Extend to find global count of all nth itemset from the global candidates
    for i in tqdm(range(1, len(global_candidates))):
        for itemset in global_candidates[i+1]:
            transaction_ids = set(transaction_id_dict[(itemset[0],)])
            for i in range(1, len(itemset)):
                # We are only interested in the transactions where all items in itemset is present
                transaction_ids = transaction_ids.intersection(set(transaction_ids_dict.get((itemset[i],), {})))
            if(len(itemset) not in global_counts):
                global_counts[len(itemset)] = {}

            if(itemset not in global_counts[len(itemset)]):
                global_counts[len(itemset)][itemset] = len(transaction_ids)
            else:
                global_counts[len(itemset)][itemset] += len(transaction_ids)



In [7]:
file_path = 'processed_bakery_output_shuffled.txt'
size_of_data = sum(1 for line in open(file_path))
min_support=0.001
partition_size = 3000
num_partitions = size_of_data// partition_size
print(num_partitions)
partition_candidates = []
global_candidates = collections.defaultdict(list)
# Step 1: Partitioning
global_min_support = math.ceil((min_support*size_of_data)/num_partitions)
for i, partition in enumerate(read_file_in_partitions(file_path, partition_size)):
    print(f'Partition {i+1}:')
    dict_bakery = {}
    for line in partition:
        sales, bakery_list_str = line.strip().split(' ', 1)
        bakery_list = ast.literal_eval(bakery_list_str)
        dict_bakery[sales] = bakery_list
   
    improved_apriori = Improved_Apriori(dict_bakery, min_support=min_support, min_confidence=1, verbose=2)
    # Step 2: Retreieve frequent itemset per partition
    partition_frequent_itemset = improved_apriori.apriori()
    # Efficient Apriori for sanity check
    # partition_frequent_itemset, _ = apriori(list(dict_bakery.values()), min_support = min_support, verbosity=2)

    # Form the global candidate set from the large itemset in each partitions
    # In this space, we ignore the count of itemset in each partition as they are not useful in our global support count
    # All they do is just show the itemset was large enough in the current partition
    # Merging Phase
    for level, itemset in partition_frequent_itemset.items():
        for key in itemset.keys():
            if(key not in global_candidates[level]):
                global_candidates[level].append(key)

45
Partition 1:
Found 95 candidate itemsets from 1st Level
Found 78 frequent itemsets from 1th item candidate sets
Found 3003 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0002200603485107422


100%|██████████| 3003/3003 [00:00<00:00, 146802.35it/s]


Found 214 frequent itemsets from 2th item candidate sets
Found 402 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.008806228637695312


100%|██████████| 402/402 [00:00<00:00, 18139.77it/s]


Found 79 frequent itemsets from 3th item candidate sets
Found 13 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0007047653198242188


100%|██████████| 13/13 [00:00<00:00, 11596.33it/s]


Found 8 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 2.9087066650390625e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 2:
Found 107 candidate itemsets from 1st Level
Found 82 frequent itemsets from 1th item candidate sets
Found 3321 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0002281665802001953


100%|██████████| 3321/3321 [00:00<00:00, 199857.72it/s]


Found 225 frequent itemsets from 2th item candidate sets
Found 411 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.00787806510925293


100%|██████████| 411/411 [00:00<00:00, 27589.25it/s]


Found 88 frequent itemsets from 3th item candidate sets
Found 12 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0008149147033691406


100%|██████████| 12/12 [00:00<00:00, 12270.03it/s]


Found 5 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 1.9073486328125e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 3:
Found 100 candidate itemsets from 1st Level
Found 80 frequent itemsets from 1th item candidate sets
Found 3160 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0002789497375488281


100%|██████████| 3160/3160 [00:00<00:00, 194388.64it/s]


Found 223 frequent itemsets from 2th item candidate sets
Found 384 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.00893402099609375


100%|██████████| 384/384 [00:00<00:00, 34635.34it/s]


Found 82 frequent itemsets from 3th item candidate sets
Found 11 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0007801055908203125


100%|██████████| 11/11 [00:00<00:00, 15811.29it/s]


Found 5 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 1.8835067749023438e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 4:
Found 102 candidate itemsets from 1st Level
Found 76 frequent itemsets from 1th item candidate sets
Found 2850 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.00028705596923828125


100%|██████████| 2850/2850 [00:00<00:00, 121727.54it/s]


Found 226 frequent itemsets from 2th item candidate sets
Found 435 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.008452892303466797


100%|██████████| 435/435 [00:00<00:00, 33997.10it/s]


Found 81 frequent itemsets from 3th item candidate sets
Found 12 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0008819103240966797


100%|██████████| 12/12 [00:00<00:00, 13888.42it/s]


Found 9 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 2.2172927856445312e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 5:
Found 105 candidate itemsets from 1st Level
Found 75 frequent itemsets from 1th item candidate sets
Found 2775 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0002849102020263672


100%|██████████| 2775/2775 [00:00<00:00, 181445.64it/s]


Found 236 frequent itemsets from 2th item candidate sets
Found 480 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.011005878448486328


100%|██████████| 480/480 [00:00<00:00, 35375.69it/s]


Found 103 frequent itemsets from 3th item candidate sets
Found 25 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.001104116439819336


100%|██████████| 25/25 [00:00<00:00, 16822.97it/s]


Found 17 frequent itemsets from 4th item candidate sets
Found 1 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 5.1975250244140625e-05


100%|██████████| 1/1 [00:00<00:00, 6502.80it/s]


Found 1 frequent itemsets from 5th item candidate sets
Found 0 candidates for 6th item candidate sets
Time taken to find 6th item candidate sets: 1.0967254638671875e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 6th item candidate sets
Partition 6:
Found 104 candidate itemsets from 1st Level
Found 80 frequent itemsets from 1th item candidate sets
Found 3160 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.00024080276489257812


100%|██████████| 3160/3160 [00:00<00:00, 184439.41it/s]


Found 203 frequent itemsets from 2th item candidate sets
Found 361 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.00674891471862793


100%|██████████| 361/361 [00:00<00:00, 35920.19it/s]


Found 76 frequent itemsets from 3th item candidate sets
Found 9 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0005559921264648438


100%|██████████| 9/9 [00:00<00:00, 5558.64it/s]


Found 6 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 1.5020370483398438e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 7:
Found 99 candidate itemsets from 1st Level
Found 75 frequent itemsets from 1th item candidate sets
Found 2775 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.00019693374633789062


100%|██████████| 2775/2775 [00:00<00:00, 110061.21it/s]


Found 209 frequent itemsets from 2th item candidate sets
Found 362 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.006705045700073242


100%|██████████| 362/362 [00:00<00:00, 33453.89it/s]


Found 93 frequent itemsets from 3th item candidate sets
Found 14 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0008540153503417969


100%|██████████| 14/14 [00:00<00:00, 11639.30it/s]


Found 11 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 2.7894973754882812e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 8:
Found 111 candidate itemsets from 1st Level
Found 79 frequent itemsets from 1th item candidate sets
Found 3081 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0003719329833984375


100%|██████████| 3081/3081 [00:00<00:00, 177496.75it/s]


Found 220 frequent itemsets from 2th item candidate sets
Found 400 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.008952140808105469


100%|██████████| 400/400 [00:00<00:00, 29129.13it/s]


Found 83 frequent itemsets from 3th item candidate sets
Found 12 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0007197856903076172


100%|██████████| 12/12 [00:00<00:00, 7207.74it/s]


Found 5 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 1.52587890625e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 9:
Found 107 candidate itemsets from 1st Level
Found 77 frequent itemsets from 1th item candidate sets
Found 2926 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0003368854522705078


100%|██████████| 2926/2926 [00:00<00:00, 190036.13it/s]


Found 249 frequent itemsets from 2th item candidate sets
Found 456 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.011298179626464844


100%|██████████| 456/456 [00:00<00:00, 32874.45it/s]


Found 89 frequent itemsets from 3th item candidate sets
Found 15 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0015959739685058594


100%|██████████| 15/15 [00:00<00:00, 16358.44it/s]


Found 7 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 6.914138793945312e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 10:
Found 108 candidate itemsets from 1st Level
Found 81 frequent itemsets from 1th item candidate sets
Found 3240 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0002498626708984375


100%|██████████| 3240/3240 [00:00<00:00, 121752.66it/s]


Found 242 frequent itemsets from 2th item candidate sets
Found 512 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.010756969451904297


100%|██████████| 512/512 [00:00<00:00, 36307.57it/s]


Found 108 frequent itemsets from 3th item candidate sets
Found 32 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0013167858123779297


100%|██████████| 32/32 [00:00<00:00, 17533.34it/s]


Found 20 frequent itemsets from 4th item candidate sets
Found 1 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 6.222724914550781e-05


100%|██████████| 1/1 [00:00<00:00, 1667.05it/s]


Found 1 frequent itemsets from 5th item candidate sets
Found 0 candidates for 6th item candidate sets
Time taken to find 6th item candidate sets: 7.867813110351562e-06


0it [00:00, ?it/s]


Found 0 frequent itemsets from 6th item candidate sets
Partition 11:
Found 105 candidate itemsets from 1st Level
Found 86 frequent itemsets from 1th item candidate sets
Found 3655 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.000347137451171875


100%|██████████| 3655/3655 [00:00<00:00, 177720.63it/s]


Found 230 frequent itemsets from 2th item candidate sets
Found 454 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.008998870849609375


100%|██████████| 454/454 [00:00<00:00, 33333.00it/s]


Found 80 frequent itemsets from 3th item candidate sets
Found 10 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0006778240203857422


100%|██████████| 10/10 [00:00<00:00, 8944.99it/s]


Found 8 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 2.6941299438476562e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 12:
Found 105 candidate itemsets from 1st Level
Found 80 frequent itemsets from 1th item candidate sets
Found 3160 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0002551078796386719


100%|██████████| 3160/3160 [00:00<00:00, 182998.06it/s]


Found 243 frequent itemsets from 2th item candidate sets
Found 526 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.010615110397338867


100%|██████████| 526/526 [00:00<00:00, 32776.28it/s]


Found 93 frequent itemsets from 3th item candidate sets
Found 19 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0010409355163574219


100%|██████████| 19/19 [00:00<00:00, 17388.56it/s]


Found 11 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 3.1948089599609375e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 13:
Found 102 candidate itemsets from 1st Level
Found 76 frequent itemsets from 1th item candidate sets
Found 2850 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0002532005310058594


100%|██████████| 2850/2850 [00:00<00:00, 114619.35it/s]


Found 225 frequent itemsets from 2th item candidate sets
Found 415 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.011634111404418945


100%|██████████| 415/415 [00:00<00:00, 34472.14it/s]


Found 97 frequent itemsets from 3th item candidate sets
Found 17 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0009789466857910156


100%|██████████| 17/17 [00:00<00:00, 17310.80it/s]


Found 10 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 3.790855407714844e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 14:
Found 113 candidate itemsets from 1st Level
Found 80 frequent itemsets from 1th item candidate sets
Found 3160 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.00035119056701660156


100%|██████████| 3160/3160 [00:00<00:00, 179423.32it/s]


Found 234 frequent itemsets from 2th item candidate sets
Found 476 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.009139060974121094


100%|██████████| 476/476 [00:00<00:00, 25226.66it/s]


Found 101 frequent itemsets from 3th item candidate sets
Found 15 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.001024007797241211


100%|██████████| 15/15 [00:00<00:00, 14489.77it/s]


Found 6 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 2.002716064453125e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 15:
Found 105 candidate itemsets from 1st Level
Found 79 frequent itemsets from 1th item candidate sets
Found 3081 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0002570152282714844


100%|██████████| 3081/3081 [00:00<00:00, 146706.60it/s]


Found 225 frequent itemsets from 2th item candidate sets
Found 429 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.00819706916809082


100%|██████████| 429/429 [00:00<00:00, 31421.57it/s]


Found 105 frequent itemsets from 3th item candidate sets
Found 25 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0011630058288574219


100%|██████████| 25/25 [00:00<00:00, 7896.50it/s]


Found 15 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 5.0067901611328125e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 16:
Found 101 candidate itemsets from 1st Level
Found 79 frequent itemsets from 1th item candidate sets
Found 3081 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0002949237823486328


100%|██████████| 3081/3081 [00:00<00:00, 125518.68it/s]


Found 217 frequent itemsets from 2th item candidate sets
Found 397 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.007715940475463867


100%|██████████| 397/397 [00:00<00:00, 28721.67it/s]


Found 94 frequent itemsets from 3th item candidate sets
Found 19 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0009827613830566406


100%|██████████| 19/19 [00:00<00:00, 14971.21it/s]


Found 11 frequent itemsets from 4th item candidate sets
Found 1 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 3.0994415283203125e-05


100%|██████████| 1/1 [00:00<00:00, 7810.62it/s]


Found 1 frequent itemsets from 5th item candidate sets
Found 0 candidates for 6th item candidate sets
Time taken to find 6th item candidate sets: 6.9141387939453125e-06


0it [00:00, ?it/s]


Found 0 frequent itemsets from 6th item candidate sets
Partition 17:
Found 104 candidate itemsets from 1st Level
Found 79 frequent itemsets from 1th item candidate sets
Found 3081 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.00029206275939941406


100%|██████████| 3081/3081 [00:00<00:00, 164768.78it/s]


Found 225 frequent itemsets from 2th item candidate sets
Found 402 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.009448051452636719


100%|██████████| 402/402 [00:00<00:00, 27589.59it/s]


Found 87 frequent itemsets from 3th item candidate sets
Found 16 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0013499259948730469


100%|██████████| 16/16 [00:00<00:00, 6753.43it/s]


Found 10 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 7.915496826171875e-05


0it [00:00, ?it/s]

Found 0 frequent itemsets from 5th item candidate sets
Partition 18:





Found 103 candidate itemsets from 1st Level
Found 84 frequent itemsets from 1th item candidate sets
Found 3486 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0002999305725097656


100%|██████████| 3486/3486 [00:00<00:00, 210852.33it/s]


Found 223 frequent itemsets from 2th item candidate sets
Found 434 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.009035825729370117


100%|██████████| 434/434 [00:00<00:00, 29953.40it/s]


Found 80 frequent itemsets from 3th item candidate sets
Found 12 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0007228851318359375


100%|██████████| 12/12 [00:00<00:00, 7707.76it/s]


Found 9 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 3.0040740966796875e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 19:
Found 100 candidate itemsets from 1st Level
Found 83 frequent itemsets from 1th item candidate sets
Found 3403 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0003592967987060547


100%|██████████| 3403/3403 [00:00<00:00, 181116.10it/s]


Found 238 frequent itemsets from 2th item candidate sets
Found 514 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.009116888046264648


100%|██████████| 514/514 [00:00<00:00, 35068.52it/s]


Found 114 frequent itemsets from 3th item candidate sets
Found 13 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0016660690307617188


100%|██████████| 13/13 [00:00<00:00, 19226.36it/s]


Found 9 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 3.0040740966796875e-05


0it [00:00, ?it/s]

Found 0 frequent itemsets from 5th item candidate sets
Partition 20:
Found 100 candidate itemsets from 1st Level
Found 83 frequent itemsets from 1th item candidate sets
Found 3403 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0003070831298828125



100%|██████████| 3403/3403 [00:00<00:00, 212014.15it/s]


Found 215 frequent itemsets from 2th item candidate sets
Found 400 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.00803685188293457


100%|██████████| 400/400 [00:00<00:00, 34188.28it/s]


Found 95 frequent itemsets from 3th item candidate sets
Found 15 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0009818077087402344


100%|██████████| 15/15 [00:00<00:00, 10886.76it/s]


Found 11 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 3.1948089599609375e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 21:
Found 106 candidate itemsets from 1st Level
Found 80 frequent itemsets from 1th item candidate sets
Found 3160 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.00024771690368652344


100%|██████████| 3160/3160 [00:00<00:00, 166842.91it/s]


Found 228 frequent itemsets from 2th item candidate sets
Found 419 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.008871793746948242


100%|██████████| 419/419 [00:00<00:00, 29219.12it/s]


Found 94 frequent itemsets from 3th item candidate sets
Found 16 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0009410381317138672


100%|██████████| 16/16 [00:00<00:00, 16112.57it/s]


Found 9 frequent itemsets from 4th item candidate sets
Found 1 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 4.1961669921875e-05


100%|██████████| 1/1 [00:00<00:00, 1298.55it/s]


Found 1 frequent itemsets from 5th item candidate sets
Found 0 candidates for 6th item candidate sets
Time taken to find 6th item candidate sets: 1.0013580322265625e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 6th item candidate sets
Partition 22:
Found 101 candidate itemsets from 1st Level
Found 84 frequent itemsets from 1th item candidate sets
Found 3486 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0003521442413330078


100%|██████████| 3486/3486 [00:00<00:00, 112226.70it/s]

Found 250 frequent itemsets from 2th item candidate sets
Found 495 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.01131892204284668



100%|██████████| 495/495 [00:00<00:00, 31960.41it/s]


Found 111 frequent itemsets from 3th item candidate sets
Found 21 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0013430118560791016


100%|██████████| 21/21 [00:00<00:00, 19443.79it/s]


Found 14 frequent itemsets from 4th item candidate sets
Found 1 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 3.886222839355469e-05


100%|██████████| 1/1 [00:00<00:00, 8128.50it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 23:
Found 104 candidate itemsets from 1st Level
Found 76 frequent itemsets from 1th item candidate sets
Found 2850 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.00031685829162597656


100%|██████████| 2850/2850 [00:00<00:00, 155968.87it/s]


Found 228 frequent itemsets from 2th item candidate sets
Found 448 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.009773731231689453


100%|██████████| 448/448 [00:00<00:00, 35668.42it/s]


Found 106 frequent itemsets from 3th item candidate sets
Found 25 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.001119852066040039


100%|██████████| 25/25 [00:00<00:00, 14029.65it/s]


Found 10 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 2.6702880859375e-05


0it [00:00, ?it/s]

Found 0 frequent itemsets from 5th item candidate sets
Partition 24:
Found 108 candidate itemsets from 1st Level
Found 85 frequent itemsets from 1th item candidate sets
Found 3570 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0003571510314941406



100%|██████████| 3570/3570 [00:00<00:00, 190086.26it/s]


Found 212 frequent itemsets from 2th item candidate sets
Found 412 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.008284807205200195


100%|██████████| 412/412 [00:00<00:00, 32551.34it/s]


Found 85 frequent itemsets from 3th item candidate sets
Found 13 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0007359981536865234


100%|██████████| 13/13 [00:00<00:00, 15099.96it/s]


Found 10 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 8.511543273925781e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 25:
Found 101 candidate itemsets from 1st Level
Found 77 frequent itemsets from 1th item candidate sets
Found 2926 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0004322528839111328


100%|██████████| 2926/2926 [00:00<00:00, 108535.41it/s]


Found 242 frequent itemsets from 2th item candidate sets
Found 439 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.014534950256347656


100%|██████████| 439/439 [00:00<00:00, 35309.79it/s]


Found 89 frequent itemsets from 3th item candidate sets
Found 12 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0008790493011474609


100%|██████████| 12/12 [00:00<00:00, 16346.75it/s]


Found 9 frequent itemsets from 4th item candidate sets
Found 1 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 2.6702880859375e-05


100%|██████████| 1/1 [00:00<00:00, 5077.85it/s]


Found 1 frequent itemsets from 5th item candidate sets
Found 0 candidates for 6th item candidate sets
Time taken to find 6th item candidate sets: 7.152557373046875e-06


0it [00:00, ?it/s]


Found 0 frequent itemsets from 6th item candidate sets
Partition 26:
Found 100 candidate itemsets from 1st Level
Found 86 frequent itemsets from 1th item candidate sets
Found 3655 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0002980232238769531


100%|██████████| 3655/3655 [00:00<00:00, 192437.91it/s]


Found 208 frequent itemsets from 2th item candidate sets
Found 364 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.008228063583374023


100%|██████████| 364/364 [00:00<00:00, 32742.70it/s]


Found 84 frequent itemsets from 3th item candidate sets
Found 9 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0028879642486572266


100%|██████████| 9/9 [00:00<00:00, 6507.28it/s]


Found 6 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 1.71661376953125e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 27:
Found 108 candidate itemsets from 1st Level
Found 80 frequent itemsets from 1th item candidate sets
Found 3160 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.00044608116149902344


100%|██████████| 3160/3160 [00:00<00:00, 170193.65it/s]


Found 236 frequent itemsets from 2th item candidate sets
Found 429 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.010729074478149414


100%|██████████| 429/429 [00:00<00:00, 32926.30it/s]


Found 107 frequent itemsets from 3th item candidate sets
Found 20 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0011768341064453125


100%|██████████| 20/20 [00:00<00:00, 15975.26it/s]


Found 14 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 4.482269287109375e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 28:
Found 101 candidate itemsets from 1st Level
Found 77 frequent itemsets from 1th item candidate sets
Found 2926 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0007958412170410156


100%|██████████| 2926/2926 [00:00<00:00, 135219.63it/s]


Found 215 frequent itemsets from 2th item candidate sets
Found 402 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.009252071380615234


100%|██████████| 402/402 [00:00<00:00, 33170.90it/s]

Found 80 frequent itemsets from 3th item candidate sets
Found 8 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0007321834564208984



100%|██████████| 8/8 [00:00<00:00, 4553.46it/s]


Found 5 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 0.0001990795135498047


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 29:
Found 109 candidate itemsets from 1st Level
Found 78 frequent itemsets from 1th item candidate sets
Found 3003 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.00016689300537109375


100%|██████████| 3003/3003 [00:00<00:00, 176875.69it/s]


Found 203 frequent itemsets from 2th item candidate sets
Found 354 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.00718379020690918


100%|██████████| 354/354 [00:00<00:00, 28150.76it/s]


Found 75 frequent itemsets from 3th item candidate sets
Found 9 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0012249946594238281


100%|██████████| 9/9 [00:00<00:00, 13530.01it/s]


Found 3 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 4.57763671875e-05


0it [00:00, ?it/s]

Found 0 frequent itemsets from 5th item candidate sets
Partition 30:





Found 103 candidate itemsets from 1st Level
Found 77 frequent itemsets from 1th item candidate sets
Found 2926 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0004558563232421875


100%|██████████| 2926/2926 [00:00<00:00, 177860.23it/s]


Found 227 frequent itemsets from 2th item candidate sets
Found 453 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.010113954544067383


100%|██████████| 453/453 [00:00<00:00, 30910.71it/s]


Found 93 frequent itemsets from 3th item candidate sets
Found 11 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0016338825225830078


100%|██████████| 11/11 [00:00<00:00, 14806.59it/s]


Found 8 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 2.384185791015625e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 31:
Found 105 candidate itemsets from 1st Level
Found 76 frequent itemsets from 1th item candidate sets
Found 2850 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0002727508544921875


100%|██████████| 2850/2850 [00:00<00:00, 149073.62it/s]


Found 259 frequent itemsets from 2th item candidate sets
Found 599 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.01369929313659668


100%|██████████| 599/599 [00:00<00:00, 21331.38it/s]


Found 122 frequent itemsets from 3th item candidate sets
Found 32 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0016047954559326172


100%|██████████| 32/32 [00:00<00:00, 15825.70it/s]


Found 21 frequent itemsets from 4th item candidate sets
Found 1 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 8.96453857421875e-05


100%|██████████| 1/1 [00:00<00:00, 1663.75it/s]


Found 1 frequent itemsets from 5th item candidate sets
Found 0 candidates for 6th item candidate sets
Time taken to find 6th item candidate sets: 7.867813110351562e-06


0it [00:00, ?it/s]


Found 0 frequent itemsets from 6th item candidate sets
Partition 32:
Found 104 candidate itemsets from 1st Level
Found 77 frequent itemsets from 1th item candidate sets
Found 2926 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0004742145538330078


100%|██████████| 2926/2926 [00:00<00:00, 190842.89it/s]


Found 223 frequent itemsets from 2th item candidate sets
Found 406 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.00998997688293457


100%|██████████| 406/406 [00:00<00:00, 34432.37it/s]


Found 88 frequent itemsets from 3th item candidate sets
Found 16 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0009818077087402344


100%|██████████| 16/16 [00:00<00:00, 16292.51it/s]


Found 9 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 9.703636169433594e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 33:
Found 108 candidate itemsets from 1st Level
Found 77 frequent itemsets from 1th item candidate sets
Found 2926 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0002720355987548828


100%|██████████| 2926/2926 [00:00<00:00, 175367.00it/s]


Found 225 frequent itemsets from 2th item candidate sets
Found 451 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.00923299789428711


100%|██████████| 451/451 [00:00<00:00, 27599.34it/s]


Found 93 frequent itemsets from 3th item candidate sets
Found 18 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0009579658508300781


100%|██████████| 18/18 [00:00<00:00, 12087.33it/s]


Found 4 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 2.193450927734375e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 34:
Found 100 candidate itemsets from 1st Level
Found 78 frequent itemsets from 1th item candidate sets
Found 3003 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0003001689910888672


100%|██████████| 3003/3003 [00:00<00:00, 168596.33it/s]


Found 225 frequent itemsets from 2th item candidate sets
Found 414 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.00958108901977539


100%|██████████| 414/414 [00:00<00:00, 31382.80it/s]


Found 89 frequent itemsets from 3th item candidate sets
Found 12 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0008869171142578125


100%|██████████| 12/12 [00:00<00:00, 1377.55it/s]


Found 5 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 1.7881393432617188e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 35:
Found 100 candidate itemsets from 1st Level
Found 79 frequent itemsets from 1th item candidate sets
Found 3081 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0003020763397216797


100%|██████████| 3081/3081 [00:00<00:00, 167054.28it/s]


Found 234 frequent itemsets from 2th item candidate sets
Found 469 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.011085033416748047


100%|██████████| 469/469 [00:00<00:00, 27153.78it/s]


Found 109 frequent itemsets from 3th item candidate sets
Found 22 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0012390613555908203


100%|██████████| 22/22 [00:00<00:00, 15816.71it/s]


Found 14 frequent itemsets from 4th item candidate sets
Found 1 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 4.100799560546875e-05


100%|██████████| 1/1 [00:00<00:00, 6502.80it/s]


Found 1 frequent itemsets from 5th item candidate sets
Found 0 candidates for 6th item candidate sets
Time taken to find 6th item candidate sets: 6.9141387939453125e-06


0it [00:00, ?it/s]


Found 0 frequent itemsets from 6th item candidate sets
Partition 36:
Found 103 candidate itemsets from 1st Level
Found 79 frequent itemsets from 1th item candidate sets
Found 3081 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0007600784301757812


100%|██████████| 3081/3081 [00:00<00:00, 167709.02it/s]


Found 212 frequent itemsets from 2th item candidate sets
Found 375 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.008258819580078125


100%|██████████| 375/375 [00:00<00:00, 34773.26it/s]


Found 82 frequent itemsets from 3th item candidate sets
Found 14 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.001390218734741211


100%|██████████| 14/14 [00:00<00:00, 9420.87it/s]


Found 7 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 5.91278076171875e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 37:
Found 102 candidate itemsets from 1st Level
Found 76 frequent itemsets from 1th item candidate sets
Found 2850 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0007479190826416016


100%|██████████| 2850/2850 [00:00<00:00, 107804.25it/s]


Found 240 frequent itemsets from 2th item candidate sets
Found 483 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.012969970703125


100%|██████████| 483/483 [00:00<00:00, 32579.35it/s]


Found 105 frequent itemsets from 3th item candidate sets
Found 18 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.002106189727783203


100%|██████████| 18/18 [00:00<00:00, 8919.83it/s]


Found 14 frequent itemsets from 4th item candidate sets
Found 1 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 8.0108642578125e-05


100%|██████████| 1/1 [00:00<00:00, 1711.96it/s]


Found 1 frequent itemsets from 5th item candidate sets
Found 0 candidates for 6th item candidate sets
Time taken to find 6th item candidate sets: 2.288818359375e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 6th item candidate sets
Partition 38:
Found 103 candidate itemsets from 1st Level
Found 80 frequent itemsets from 1th item candidate sets
Found 3160 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.00038313865661621094


100%|██████████| 3160/3160 [00:00<00:00, 139699.61it/s]


Found 223 frequent itemsets from 2th item candidate sets
Found 397 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.008615970611572266


100%|██████████| 397/397 [00:00<00:00, 29829.43it/s]


Found 120 frequent itemsets from 3th item candidate sets
Found 28 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.001734018325805664


100%|██████████| 28/28 [00:00<00:00, 15573.60it/s]


Found 20 frequent itemsets from 4th item candidate sets
Found 1 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 0.0005819797515869141


100%|██████████| 1/1 [00:00<00:00, 1830.77it/s]


Found 1 frequent itemsets from 5th item candidate sets
Found 0 candidates for 6th item candidate sets
Time taken to find 6th item candidate sets: 2.2172927856445312e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 6th item candidate sets
Partition 39:
Found 106 candidate itemsets from 1st Level
Found 76 frequent itemsets from 1th item candidate sets
Found 2850 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.00026607513427734375


100%|██████████| 2850/2850 [00:00<00:00, 126790.06it/s]


Found 229 frequent itemsets from 2th item candidate sets
Found 448 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.009862184524536133


100%|██████████| 448/448 [00:00<00:00, 27632.25it/s]


Found 87 frequent itemsets from 3th item candidate sets
Found 10 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0009281635284423828


100%|██████████| 10/10 [00:00<00:00, 14899.84it/s]


Found 6 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 4.601478576660156e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 40:
Found 103 candidate itemsets from 1st Level
Found 78 frequent itemsets from 1th item candidate sets
Found 3003 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0004038810729980469


100%|██████████| 3003/3003 [00:00<00:00, 154515.62it/s]


Found 238 frequent itemsets from 2th item candidate sets
Found 470 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.011221170425415039


100%|██████████| 470/470 [00:00<00:00, 28051.55it/s]


Found 94 frequent itemsets from 3th item candidate sets
Found 15 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0010781288146972656


100%|██████████| 15/15 [00:00<00:00, 11405.83it/s]


Found 10 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 7.700920104980469e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 41:
Found 100 candidate itemsets from 1st Level
Found 79 frequent itemsets from 1th item candidate sets
Found 3081 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0003941059112548828


100%|██████████| 3081/3081 [00:00<00:00, 144479.17it/s]


Found 228 frequent itemsets from 2th item candidate sets
Found 408 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.010046243667602539


100%|██████████| 408/408 [00:00<00:00, 26573.02it/s]


Found 90 frequent itemsets from 3th item candidate sets
Found 14 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0021467208862304688


100%|██████████| 14/14 [00:00<00:00, 16863.94it/s]


Found 11 frequent itemsets from 4th item candidate sets
Found 1 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 3.910064697265625e-05


100%|██████████| 1/1 [00:00<00:00, 1009.22it/s]


Found 1 frequent itemsets from 5th item candidate sets
Found 0 candidates for 6th item candidate sets
Time taken to find 6th item candidate sets: 2.193450927734375e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 6th item candidate sets
Partition 42:
Found 111 candidate itemsets from 1st Level
Found 82 frequent itemsets from 1th item candidate sets
Found 3321 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0006382465362548828


100%|██████████| 3321/3321 [00:00<00:00, 92981.53it/s]


Found 240 frequent itemsets from 2th item candidate sets
Found 429 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.010107040405273438


100%|██████████| 429/429 [00:00<00:00, 19167.99it/s]


Found 110 frequent itemsets from 3th item candidate sets
Found 23 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0013649463653564453


100%|██████████| 23/23 [00:00<00:00, 19375.17it/s]


Found 17 frequent itemsets from 4th item candidate sets
Found 1 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 5.0067901611328125e-05


100%|██████████| 1/1 [00:00<00:00, 7681.88it/s]


Found 1 frequent itemsets from 5th item candidate sets
Found 0 candidates for 6th item candidate sets
Time taken to find 6th item candidate sets: 7.867813110351562e-06


0it [00:00, ?it/s]


Found 0 frequent itemsets from 6th item candidate sets
Partition 43:
Found 107 candidate itemsets from 1st Level
Found 77 frequent itemsets from 1th item candidate sets
Found 2926 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0002620220184326172


100%|██████████| 2926/2926 [00:00<00:00, 181616.21it/s]


Found 219 frequent itemsets from 2th item candidate sets
Found 419 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.00988006591796875


100%|██████████| 419/419 [00:00<00:00, 28745.50it/s]


Found 101 frequent itemsets from 3th item candidate sets
Found 16 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0027430057525634766


100%|██████████| 16/16 [00:00<00:00, 13137.99it/s]


Found 10 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 3.3855438232421875e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 44:
Found 103 candidate itemsets from 1st Level
Found 78 frequent itemsets from 1th item candidate sets
Found 3003 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.00031185150146484375


100%|██████████| 3003/3003 [00:00<00:00, 175767.44it/s]


Found 242 frequent itemsets from 2th item candidate sets
Found 477 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.01060795783996582


100%|██████████| 477/477 [00:00<00:00, 27805.80it/s]


Found 90 frequent itemsets from 3th item candidate sets
Found 15 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0009791851043701172


100%|██████████| 15/15 [00:00<00:00, 4548.15it/s]


Found 9 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 4.1961669921875e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 45:
Found 103 candidate itemsets from 1st Level
Found 77 frequent itemsets from 1th item candidate sets
Found 2926 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.0003390312194824219


100%|██████████| 2926/2926 [00:00<00:00, 157992.40it/s]


Found 222 frequent itemsets from 2th item candidate sets
Found 393 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.014330863952636719


100%|██████████| 393/393 [00:00<00:00, 28529.72it/s]


Found 76 frequent itemsets from 3th item candidate sets
Found 7 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0005869865417480469


100%|██████████| 7/7 [00:00<00:00, 12367.37it/s]


Found 6 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 2.002716064453125e-05


0it [00:00, ?it/s]


Found 0 frequent itemsets from 5th item candidate sets
Partition 46:
Found 97 candidate itemsets from 1st Level
Found 76 frequent itemsets from 1th item candidate sets
Found 2850 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.00022220611572265625


100%|██████████| 2850/2850 [00:00<00:00, 250393.10it/s]


Found 207 frequent itemsets from 2th item candidate sets
Found 316 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.008298873901367188


100%|██████████| 316/316 [00:00<00:00, 69436.30it/s]


Found 95 frequent itemsets from 3th item candidate sets
Found 15 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.001544952392578125


100%|██████████| 15/15 [00:00<00:00, 35444.82it/s]


Found 13 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 0.00011682510375976562


0it [00:00, ?it/s]

Found 0 frequent itemsets from 5th item candidate sets





In [8]:
# Now we have to read the lines in chunks for our disk-based operations
min_support_count = min_support * size_of_data
for i, partition in enumerate(read_file_in_partitions(file_path, partition_size)):
    print(f'Partition {i+1}:')

    # Hold the partition data in main memory
    dict_bakery = {}
    for line in partition:
        sales, bakery_list_str = line.strip().split(' ', 1)
        bakery_list = ast.literal_eval(bakery_list_str)
        dict_bakery[sales] = bakery_list
    # Pure disk based implementation would probably require us to save the global candidates in disk 
    generate_global_counts(dict_bakery, global_candidates)
    
        

Partition 1:


100%|██████████| 4/4 [00:00<00:00, 129.58it/s]


Partition 2:


100%|██████████| 4/4 [00:00<00:00, 130.64it/s]

Partition 3:



100%|██████████| 4/4 [00:00<00:00, 118.36it/s]


Partition 4:


100%|██████████| 4/4 [00:00<00:00, 127.40it/s]


Partition 5:


100%|██████████| 4/4 [00:00<00:00, 124.75it/s]


Partition 6:


100%|██████████| 4/4 [00:00<00:00, 113.28it/s]

Partition 7:



100%|██████████| 4/4 [00:00<00:00, 123.44it/s]


Partition 8:


100%|██████████| 4/4 [00:00<00:00, 78.31it/s]


Partition 9:


100%|██████████| 4/4 [00:00<00:00, 116.80it/s]


Partition 10:


100%|██████████| 4/4 [00:00<00:00, 113.11it/s]


Partition 11:


100%|██████████| 4/4 [00:00<00:00, 129.09it/s]


Partition 12:


100%|██████████| 4/4 [00:00<00:00, 118.26it/s]


Partition 13:


100%|██████████| 4/4 [00:00<00:00, 119.59it/s]


Partition 14:


100%|██████████| 4/4 [00:00<00:00, 114.78it/s]


Partition 15:


100%|██████████| 4/4 [00:00<00:00, 128.38it/s]


Partition 16:


100%|██████████| 4/4 [00:00<00:00, 123.56it/s]


Partition 17:


100%|██████████| 4/4 [00:00<00:00, 130.42it/s]


Partition 18:


100%|██████████| 4/4 [00:00<00:00, 124.07it/s]

Partition 19:



100%|██████████| 4/4 [00:00<00:00, 123.95it/s]


Partition 20:


100%|██████████| 4/4 [00:00<00:00, 131.84it/s]


Partition 21:


100%|██████████| 4/4 [00:00<00:00, 129.79it/s]


Partition 22:


100%|██████████| 4/4 [00:00<00:00, 132.36it/s]


Partition 23:


100%|██████████| 4/4 [00:00<00:00, 132.55it/s]


Partition 24:


100%|██████████| 4/4 [00:00<00:00, 126.24it/s]


Partition 25:


100%|██████████| 4/4 [00:00<00:00, 123.42it/s]


Partition 26:


100%|██████████| 4/4 [00:00<00:00, 133.94it/s]


Partition 27:


100%|██████████| 4/4 [00:00<00:00, 124.03it/s]


Partition 28:


100%|██████████| 4/4 [00:00<00:00, 125.57it/s]


Partition 29:


100%|██████████| 4/4 [00:00<00:00, 109.09it/s]


Partition 30:


100%|██████████| 4/4 [00:00<00:00, 91.27it/s]


Partition 31:


100%|██████████| 4/4 [00:00<00:00, 91.05it/s]


Partition 32:


100%|██████████| 4/4 [00:00<00:00, 118.35it/s]


Partition 33:


100%|██████████| 4/4 [00:00<00:00, 121.41it/s]


Partition 34:


100%|██████████| 4/4 [00:00<00:00, 125.83it/s]


Partition 35:


100%|██████████| 4/4 [00:00<00:00, 120.45it/s]


Partition 36:


100%|██████████| 4/4 [00:00<00:00, 124.66it/s]


Partition 37:


100%|██████████| 4/4 [00:00<00:00, 115.90it/s]


Partition 38:


100%|██████████| 4/4 [00:00<00:00, 102.34it/s]


Partition 39:


100%|██████████| 4/4 [00:00<00:00, 78.27it/s]


Partition 40:


100%|██████████| 4/4 [00:00<00:00, 113.52it/s]


Partition 41:


100%|██████████| 4/4 [00:00<00:00, 120.72it/s]


Partition 42:


100%|██████████| 4/4 [00:00<00:00, 112.05it/s]


Partition 43:


100%|██████████| 4/4 [00:00<00:00, 128.14it/s]


Partition 44:


100%|██████████| 4/4 [00:00<00:00, 121.46it/s]


Partition 45:


100%|██████████| 4/4 [00:00<00:00, 123.64it/s]


Partition 46:


100%|██████████| 4/4 [00:00<00:00, 339.70it/s]


In [9]:
global_frequent_itemsets = {level: {itemset: count for itemset, count in itemsets.items() if count >= min_support_count} for level, itemsets in global_counts.items()}
global_frequent_itemsets

{1: {('BAGUETTE',): 15292,
  ('TRADITIONAL BAGUETTE',): 67688,
  ('CAMPAGNE',): 3905,
  ('CEREAL BAGUETTE',): 4961,
  ('BANETTE',): 15130,
  ('PAIN AU CHOCOLAT',): 10578,
  ('CROISSANT',): 11508,
  ('BANETTINE',): 2817,
  ('BOULE 200G',): 2691,
  ('COMPLET',): 3140,
  ('COUPE',): 20470,
  ('DIVERS VIENNOISERIE',): 827,
  ('ECLAIR',): 2006,
  ('FICELLE',): 2655,
  ('GRAND FAR BRETON',): 1378,
  ('QUIM BREAD',): 1304,
  ('BRIOCHE',): 1657,
  ('TARTELETTE',): 2861,
  ('NANTAIS',): 390,
  ('FLAN ABRICOT',): 568,
  ('FORMULE SANDWICH',): 4202,
  ('CAFE OU EAU',): 1436,
  ('MOISSON',): 3107,
  ('SPECIAL BREAD',): 5195,
  ('KOUIGN AMANN',): 1332,
  ('SUCETTE',): 525,
  ('GD KOUIGN AMANN',): 832,
  ('DIVERS BOULANGERIE',): 316,
  ('PAIN',): 1918,
  ('VIK BREAD',): 3147,
  ('SPECIAL BREAD KG',): 460,
  ('SAVARIN',): 337,
  ('PLAT 7.60E',): 244,
  ('PAIN BANETTE',): 2727,
  ('NOIX JAPONAISE',): 334,
  ('BOULE POLKA',): 502,
  ('SAND JB EMMENTAL',): 1505,
  ('BOULE 400G',): 4099,
  ('TRAITEUR',):

In [18]:
# Calculate global_frequent_itemsets (as already in your code)
# This part of the code remains unchanged

# Convert tuple keys to strings for the entire global_frequent_itemsets
global_frequent_itemsets_str = {}
for level, itemsets in global_frequent_itemsets.items():
    level_str = str(level)
    itemsets_str = {str(itemset): count for itemset, count in itemsets.items()}
    global_frequent_itemsets_str[level_str] = itemsets_str

# Save the converted dictionary to a JSON file
import json

# Define the file path for the JSON output
json_output_file = 'frequent_itemsets.json'

# Serialize and save the frequent itemsets to the JSON file
with open(json_output_file, 'w') as json_file:
    json.dump(global_frequent_itemsets_str, json_file)


In [None]:
# Calculate global_frequent_itemsets (as already in your code)
# This part of the code remains unchanged

# Define the file path for the text output
text_output_file = 'frequent_itemsets.txt'

# Open the text file for writing
with open(text_output_file, 'w') as text_file:
    for level, itemsets in global_frequent_itemsets.items():
        text_file.write(f"Level {level}:\n")
        for itemset, count in itemsets.items():
            text_file.write(f"{itemset}: {count}\n")
        text_file.write("\n")

# Close the text file
text_file.close()


In [10]:
for level in global_frequent_itemsets:
    print(len(global_frequent_itemsets[level]))

73
178
50
3
0


In [12]:
data = pd.read_csv('Bakery_sales.csv')
data = data[['tid', 'article']]
grouped_data = data.groupby('tid')['article'].apply(list)
grouped_data = grouped_data.to_dict()

In [13]:
improved_apriori = Improved_Apriori(grouped_data, min_support=min_support, min_confidence=1, verbose=2)
frequent_bakery_set = improved_apriori.apriori()

Found 149 candidate itemsets from 1st Level
Found 73 frequent itemsets from 1th item candidate sets
Found 2628 candidates for 2th item candidate sets
Time taken to find 2th item candidate sets: 0.004758119583129883


100%|██████████| 2628/2628 [00:00<00:00, 3632.79it/s]


Found 178 frequent itemsets from 2th item candidate sets
Found 296 candidates for 3th item candidate sets
Time taken to find 3th item candidate sets: 0.004589080810546875


100%|██████████| 296/296 [00:00<00:00, 537.23it/s]


Found 50 frequent itemsets from 3th item candidate sets
Found 5 candidates for 4th item candidate sets
Time taken to find 4th item candidate sets: 0.0002620220184326172


100%|██████████| 5/5 [00:00<00:00, 277.10it/s]


Found 3 frequent itemsets from 4th item candidate sets
Found 0 candidates for 5th item candidate sets
Time taken to find 5th item candidate sets: 6.604194641113281e-05


0it [00:00, ?it/s]

Found 0 frequent itemsets from 5th item candidate sets





In [14]:
itemset, _ = apriori(list(grouped_data.values()), min_support = min_support, verbosity=2)

Generating itemsets.
 Counting itemsets of length 1.
  Found 149 candidate itemsets of length 1.
  Found 73 large itemsets of length 1.
    [('BAGUETTE',), ('PAIN AU CHOCOLAT',), ('PAIN',), ('TRADITIONAL BAGUETTE',), ('CROISSANT',), ('BANETTE',), ('BANETTINE',), ('SPECIAL BREAD',), ('COUPE',), ('SAND JB EMMENTAL',), ('KOUIGN AMANN',), ('BOULE 200G',), ('BOULE 400G',), ('CAMPAGNE',), ('MOISSON',), ('CAFE OU EAU',), ('BRIOCHE',), ('CEREAL BAGUETTE',), ('SEIGLE',), ('COMPLET',), ('DIVERS PATISSERIE',), ('GAL FRANGIPANE 4P',), ('COOKIE',), ('FICELLE',), ('PAIN AUX RAISINS',), ('FINANCIER X5',), ('VIK BREAD',), ('DIVERS VIENNOISERIE',), ('SANDWICH COMPLET',), ('PAIN BANETTE',), ('GRAND FAR BRETON',), ('QUIM BREAD',), ('SPECIAL BREAD KG',), ('GD KOUIGN AMANN',), ('BOULE POLKA',), ('DEMI BAGUETTE',), ('CHAUSSON AUX POMMES',), ('BAGUETTE GRAINE',), ('DIVERS CONFISERIE',), ('SUCETTE',), ('DIVERS BOULANGERIE',), ('BOISSON 33CL',), ('FORMULE SANDWICH',), ('DIVERS SANDWICHS',), ('CROISSANT AMANDES

In [15]:
frequent_bakery_set

{1: {('BAGUETTE',): 15273,
  ('BAGUETTE GRAINE',): 1500,
  ('BANETTE',): 15107,
  ('BANETTINE',): 2815,
  ('BOISSON 33CL',): 1468,
  ('BOULE 200G',): 2685,
  ('BOULE 400G',): 4082,
  ('BOULE POLKA',): 502,
  ('BRIOCHE',): 1656,
  ('CAFE OU EAU',): 1429,
  ('CAMPAGNE',): 3892,
  ('CEREAL BAGUETTE',): 4950,
  ('CHAUSSON AUX POMMES',): 1440,
  ('CHOU CHANTILLY',): 205,
  ('COMPLET',): 3133,
  ('COOKIE',): 1989,
  ('COUPE',): 19424,
  ('CROISSANT',): 11446,
  ('CROISSANT AMANDES',): 1802,
  ('DEMI BAGUETTE',): 1144,
  ('DEMI PAIN',): 163,
  ('DIVERS BOULANGERIE',): 311,
  ('DIVERS CONFISERIE',): 471,
  ('DIVERS PATISSERIE',): 480,
  ('DIVERS SANDWICHS',): 234,
  ('DIVERS VIENNOISERIE',): 818,
  ('ECLAIR',): 2002,
  ('FICELLE',): 2644,
  ('FINANCIER X5',): 1017,
  ('FLAN',): 830,
  ('FLAN ABRICOT',): 568,
  ('FONDANT CHOCOLAT',): 218,
  ('FORMULE SANDWICH',): 4180,
  ('FRAISIER',): 355,
  ('GAL FRANGIPANE 4P',): 203,
  ('GD FAR BRETON',): 161,
  ('GD KOUIGN AMANN',): 832,
  ('GRAND FAR BRET

In [16]:
itemset

{1: {('BAGUETTE',): 15273,
  ('PAIN AU CHOCOLAT',): 10529,
  ('PAIN',): 1917,
  ('TRADITIONAL BAGUETTE',): 67535,
  ('CROISSANT',): 11446,
  ('BANETTE',): 15107,
  ('BANETTINE',): 2815,
  ('SPECIAL BREAD',): 5182,
  ('COUPE',): 19424,
  ('SAND JB EMMENTAL',): 1502,
  ('KOUIGN AMANN',): 1330,
  ('BOULE 200G',): 2685,
  ('BOULE 400G',): 4082,
  ('CAMPAGNE',): 3892,
  ('MOISSON',): 3095,
  ('CAFE OU EAU',): 1429,
  ('BRIOCHE',): 1656,
  ('CEREAL BAGUETTE',): 4950,
  ('SEIGLE',): 1325,
  ('COMPLET',): 3133,
  ('DIVERS PATISSERIE',): 480,
  ('GAL FRANGIPANE 4P',): 203,
  ('COOKIE',): 1989,
  ('FICELLE',): 2644,
  ('PAIN AUX RAISINS',): 1986,
  ('FINANCIER X5',): 1017,
  ('VIK BREAD',): 3126,
  ('DIVERS VIENNOISERIE',): 818,
  ('SANDWICH COMPLET',): 2205,
  ('PAIN BANETTE',): 2725,
  ('GRAND FAR BRETON',): 1378,
  ('QUIM BREAD',): 1302,
  ('SPECIAL BREAD KG',): 460,
  ('GD KOUIGN AMANN',): 832,
  ('BOULE POLKA',): 502,
  ('DEMI BAGUETTE',): 1144,
  ('CHAUSSON AUX POMMES',): 1440,
  ('BAGUETT