In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
import sys
sys.path.append('../Task 1/')
from efficient_apriori import apriori
from improved_apriori import Improved_Apriori
import json
import time
import os
import itertools
import ast
from tqdm import tqdm
import collections
import math
import random

In [2]:
# Process the dataset by chunks into username and the list of animes the user watched
def process_anime_chunk(df, carry_over):
    if carry_over is not None:
        df = pd.concat([carry_over, df])
    groups = df.groupby('username')['title'].apply(list)
    last_user = df.iloc[-1]['username']
    if last_user in groups:
        carry_over = df[df['username'] == last_user]
        groups = groups.drop(last_user)
    else:
        carry_over = None
    return groups, carry_over

In [3]:
carry_over = None
chunksize = 100000 # adjust this value depending on your available memory
# Might have to figure out a way to shuffle the dataset 
if(not os.path.exists('dataset/processed_anime_output.txt')):
    with open('dataset/processed_anime_output.txt', 'w') as f:
        for chunk in pd.read_csv('dataset/final_animedataset.csv', chunksize=chunksize):
            groups, carry_over = process_anime_chunk(chunk, carry_over)
            for user, anime_list in groups.items():
                f.write(f'{user} {anime_list}\n')

        # don't forget to process the last carry_over
        if carry_over is not None:
            groups, _ = process_anime_chunk(carry_over, None)
            for user, anime_list in groups.items():
                f.write(f'{user} {anime_list}\n')

In [4]:
# Shuffle the text in chunks 
def shuffle_large_file(file_name, output_file_name, chunk_size):
    with open(file_name, 'r') as f:
        while True:
            lines = list(itertools.islice(f, chunk_size))
            if not lines:
                break
            random.shuffle(lines)
            with open(output_file_name, 'a') as out:
                out.write(''.join(lines))


# Call the function with your parameters
if(not os.path.exists('dataset/processed_anime_output_shuffled.txt')):
    shuffle_large_file('dataset/processed_anime_output.txt', 'dataset/processed_anime_output_shuffled.txt', 3000000)

In [5]:
def read_file_in_partitions(file_path, partition_size):
    with open(file_path, 'r') as file:
        partition = []
        for line in file:
            partition.append(line)
            if len(partition) >= partition_size:
                yield partition
                partition = []
        if partition:  # yield any remaining lines
            yield partition

In [6]:
# Global variable to get the counts of all itemsets
global_counts = {}
def generate_global_counts(partition, global_candidates):

    # For 1th itemset, generate the transaction id list for the ith partition 
    transaction_id_dict = collections.defaultdict(list)
    for transaction_id in partition:
        for item in partition[transaction_id]:
            item_tuple = (item,)
            transaction_id_dict[item_tuple].append(transaction_id)

    # Filter based on the global candidates formed
    transaction_ids_dict = {item: transaction_ids for item, transaction_ids in transaction_id_dict.items() if item in global_candidates[1]}

    # Get the global count of all 1th itemset
    for item in transaction_id_dict:
        if(len(item) not in global_counts):
            global_counts[len(item)] = {}
        if(item not in global_counts[len(item)]):
            global_counts[len(item)][item] = len(transaction_id_dict[item])
        else:
            global_counts[len(item)][item] += len(transaction_id_dict[item])

    # Extend to find global count of all nth itemset from the global candidates
    for i in tqdm(range(1, len(global_candidates))):
        for itemset in global_candidates[i+1]:
            transaction_ids = set(transaction_id_dict[(itemset[0],)])
            for i in range(1, len(itemset)):
                # We are only interested in the transactions where all items in itemset is present
                transaction_ids = transaction_ids.intersection(set(transaction_ids_dict.get((itemset[i],), {})))
            if(len(itemset) not in global_counts):
                global_counts[len(itemset)] = {}

            if(itemset not in global_counts[len(itemset)]):
                global_counts[len(itemset)][itemset] = len(transaction_ids)
            else:
                global_counts[len(itemset)][itemset] += len(transaction_ids)



In [7]:
file_path = 'dataset/processed_anime_output_shuffled.txt'
size_of_data = sum(1 for line in open(file_path))

partition_size = 10000
if(partition_size < size_of_data):
    num_partitions = size_of_data// partition_size
else:
    print('Size of partition exceeds size of data')
print(num_partitions)
partition_candidates = []
global_candidates = collections.defaultdict(list)
# Step 1: Partitioning
min_support_range=np.arange(0.2, 0.7, 0.1)

for min_support in min_support_range:
    output = {}
    global_min_support = math.ceil((min_support*size_of_data)/num_partitions)
    start = time.time()
    for i, partition in enumerate(read_file_in_partitions(file_path, partition_size)):
        print(f'Partition {i+1}:')
        dict_anime = {}
        for line in partition:
            user, anime_list_str = line.strip().split(' ', 1)
            anime_list = ast.literal_eval(anime_list_str)
            dict_anime[user] = anime_list
    
        improved_apriori = Improved_Apriori(dict_anime, min_support=min_support, min_confidence=1, verbose=0)
        # Step 2: Retreieve frequent itemset per partition
        partition_frequent_itemset = improved_apriori.apriori()
        # Efficient Apriori for sanity check
        # partition_frequent_itemset, _ = apriori(list(dict_anime.values()), min_support = min_support, verbosity=2)

        # Form the global candidate set from the large itemset in each partitions
        # In this space, we ignore the count of itemset in each partition as they are not useful in our global support count
        # All they do is just show the itemset was large enough in the current partition
        # Merging Phase
        for level, itemset in partition_frequent_itemset.items():
            for key in itemset.keys():
                if(key not in global_candidates[level]):
                    global_candidates[level].append(key)

    min_support_count = min_support * size_of_data                
    # Global counting phase
    global_counts = {}
    for i, partition in enumerate(read_file_in_partitions(file_path, partition_size)):
        print(f'Partition {i+1}:')
        # Hold the partition data in main memory
        dict_anime = {}
        for line in partition:
            user, anime_list_str = line.strip().split(' ', 1)
            anime_list = ast.literal_eval(anime_list_str)
            dict_anime[user] = anime_list
        # Pure disk based implementation would probably require us to save the global candidates in disk 
        generate_global_counts(dict_anime, global_candidates)
    global_frequent_itemsets = {level: {itemset: count for itemset, count in itemsets.items() if count >= min_support_count} for level, itemsets in global_counts.items()}
    end = time.time()
    output[min_support]={}
    for level, itemsets in global_frequent_itemsets.items():
        output[min_support][f'Level {level}'] = []
        for items, count in itemsets.items():
            output[min_support][f'Level {level}'].append([(list(items), count)])

    output[min_support]['Time Taken'] = end-start

    # Save to JSON file
    with open(f'anime_dataset_results/output_anime_{min_support}.json', 'w') as f:
        json.dump(output, f)

11
Partition 1:


100%|██████████| 36315/36315 [00:14<00:00, 2501.75it/s]
100%|██████████| 68582/68582 [01:02<00:00, 1091.33it/s]
100%|██████████| 72529/72529 [01:47<00:00, 672.88it/s]
100%|██████████| 38495/38495 [01:24<00:00, 455.68it/s]
100%|██████████| 8633/8633 [00:25<00:00, 339.62it/s]
100%|██████████| 605/605 [00:02<00:00, 281.91it/s]
100%|██████████| 6/6 [00:00<00:00, 256.45it/s]


Partition 2:


100%|██████████| 39340/39340 [00:15<00:00, 2481.97it/s]
100%|██████████| 76054/76054 [01:19<00:00, 958.78it/s] 
100%|██████████| 85345/85345 [02:09<00:00, 661.09it/s]
100%|██████████| 50329/50329 [01:47<00:00, 466.10it/s]
100%|██████████| 14263/14263 [00:38<00:00, 369.65it/s]
100%|██████████| 1602/1602 [00:05<00:00, 304.86it/s]
100%|██████████| 42/42 [00:00<00:00, 256.21it/s]
0it [00:00, ?it/s]


Partition 3:


100%|██████████| 40186/40186 [00:16<00:00, 2507.70it/s]
100%|██████████| 79851/79851 [01:20<00:00, 992.66it/s] 
100%|██████████| 91010/91010 [02:23<00:00, 632.39it/s]
100%|██████████| 54081/54081 [01:54<00:00, 473.21it/s]
100%|██████████| 15098/15098 [00:43<00:00, 350.63it/s]
100%|██████████| 1549/1549 [00:05<00:00, 309.33it/s]
100%|██████████| 39/39 [00:00<00:00, 253.37it/s]
0it [00:00, ?it/s]


Partition 4:


100%|██████████| 39340/39340 [00:15<00:00, 2556.59it/s]
100%|██████████| 75618/75618 [01:13<00:00, 1023.32it/s]
100%|██████████| 84700/84700 [02:13<00:00, 633.90it/s]
100%|██████████| 50276/50276 [01:48<00:00, 463.33it/s]
100%|██████████| 13736/13736 [00:38<00:00, 354.13it/s]
100%|██████████| 1406/1406 [00:04<00:00, 307.02it/s]
100%|██████████| 31/31 [00:00<00:00, 253.60it/s]
0it [00:00, ?it/s]


Partition 5:


100%|██████████| 42486/42486 [00:16<00:00, 2543.35it/s]
100%|██████████| 85320/85320 [01:20<00:00, 1056.75it/s]
100%|██████████| 101381/101381 [02:37<00:00, 643.68it/s]
100%|██████████| 63695/63695 [02:13<00:00, 476.40it/s]
100%|██████████| 19061/19061 [00:49<00:00, 382.70it/s]
100%|██████████| 2252/2252 [00:08<00:00, 273.93it/s]
100%|██████████| 61/61 [00:00<00:00, 240.93it/s]
0it [00:00, ?it/s]


Partition 6:


100%|██████████| 38781/38781 [00:16<00:00, 2378.38it/s]
100%|██████████| 74159/74159 [01:13<00:00, 1007.99it/s]
100%|██████████| 81287/81287 [02:01<00:00, 667.70it/s]
100%|██████████| 46639/46639 [01:36<00:00, 485.60it/s]
100%|██████████| 12762/12762 [00:33<00:00, 378.85it/s]
100%|██████████| 1377/1377 [00:05<00:00, 264.73it/s]
100%|██████████| 29/29 [00:00<00:00, 264.87it/s]
0it [00:00, ?it/s]


Partition 7:


100%|██████████| 39340/39340 [00:14<00:00, 2622.67it/s]
100%|██████████| 70892/70892 [01:09<00:00, 1014.78it/s]
100%|██████████| 76118/76118 [01:58<00:00, 641.65it/s]
100%|██████████| 42094/42094 [01:28<00:00, 473.86it/s]
100%|██████████| 10084/10084 [00:27<00:00, 364.34it/s]
100%|██████████| 811/811 [00:02<00:00, 289.78it/s]
100%|██████████| 13/13 [00:00<00:00, 267.64it/s]
0it [00:00, ?it/s]


Partition 8:


100%|██████████| 39903/39903 [00:15<00:00, 2535.59it/s]
100%|██████████| 72128/72128 [01:10<00:00, 1019.93it/s]
100%|██████████| 76227/76227 [02:11<00:00, 581.18it/s]
100%|██████████| 39665/39665 [02:09<00:00, 305.78it/s]
100%|██████████| 9002/9002 [00:35<00:00, 253.04it/s]
100%|██████████| 686/686 [00:03<00:00, 224.58it/s]
100%|██████████| 6/6 [00:00<00:00, 198.54it/s]


Partition 9:


100%|██████████| 37950/37950 [00:20<00:00, 1870.49it/s]
100%|██████████| 69096/69096 [01:33<00:00, 740.05it/s]
100%|██████████| 73776/73776 [03:04<00:00, 399.22it/s]
100%|██████████| 39919/39919 [02:56<00:00, 225.63it/s]
100%|██████████| 9736/9736 [00:54<00:00, 178.44it/s]
100%|██████████| 845/845 [00:04<00:00, 171.33it/s]
100%|██████████| 11/11 [00:00<00:00, 205.40it/s]
0it [00:00, ?it/s]


Partition 10:


100%|██████████| 39621/39621 [00:32<00:00, 1226.80it/s]
100%|██████████| 71247/71247 [02:11<00:00, 542.61it/s]
100%|██████████| 76556/76556 [02:47<00:00, 456.11it/s]
100%|██████████| 42307/42307 [02:11<00:00, 321.01it/s]
100%|██████████| 10389/10389 [00:49<00:00, 211.97it/s]
100%|██████████| 876/876 [00:04<00:00, 176.56it/s]
100%|██████████| 12/12 [00:00<00:00, 166.80it/s]
0it [00:00, ?it/s]


Partition 11:


100%|██████████| 39903/39903 [00:26<00:00, 1526.50it/s]
100%|██████████| 78057/78057 [02:00<00:00, 646.35it/s]
100%|██████████| 89742/89742 [08:31<00:00, 175.36it/s]  
100%|██████████| 53549/53549 [01:51<00:00, 480.26it/s]
100%|██████████| 14339/14339 [00:37<00:00, 379.32it/s]
100%|██████████| 1471/1471 [00:04<00:00, 323.47it/s]
100%|██████████| 40/40 [00:00<00:00, 277.09it/s]
0it [00:00, ?it/s]


Partition 12:


100%|██████████| 41041/41041 [00:09<00:00, 4306.68it/s]
100%|██████████| 79967/79967 [00:47<00:00, 1694.05it/s]
100%|██████████| 99023/99023 [01:52<00:00, 882.03it/s] 
100%|██████████| 65849/65849 [01:49<00:00, 599.59it/s]
100%|██████████| 20163/20163 [01:10<00:00, 284.64it/s]
100%|██████████| 2288/2288 [00:11<00:00, 190.75it/s]
100%|██████████| 51/51 [00:00<00:00, 121.79it/s]
0it [00:00, ?it/s]


Partition 1:


100%|██████████| 7/7 [05:40<00:00, 48.69s/it]


Partition 2:


100%|██████████| 7/7 [03:07<00:00, 26.73s/it]


Partition 3:


100%|██████████| 7/7 [03:07<00:00, 26.73s/it]


Partition 4:


100%|██████████| 7/7 [04:32<00:00, 38.88s/it]


Partition 5:


100%|██████████| 7/7 [05:59<00:00, 51.30s/it]


Partition 6:


100%|██████████| 7/7 [06:43<00:00, 57.66s/it] 


Partition 7:


100%|██████████| 7/7 [07:20<00:00, 62.92s/it] 


Partition 8:


100%|██████████| 7/7 [13:23<00:00, 114.77s/it]


Partition 9:


100%|██████████| 7/7 [04:03<00:00, 34.74s/it]


Partition 10:


100%|██████████| 7/7 [04:16<00:00, 36.63s/it]


Partition 11:


100%|██████████| 7/7 [03:16<00:00, 28.03s/it]


Partition 12:


100%|██████████| 7/7 [01:48<00:00, 15.45s/it]


Partition 1:


100%|██████████| 5565/5565 [00:03<00:00, 1840.83it/s]
100%|██████████| 2796/2796 [00:03<00:00, 812.56it/s]
100%|██████████| 437/437 [00:00<00:00, 531.81it/s]
100%|██████████| 9/9 [00:00<00:00, 415.72it/s]


Partition 2:


100%|██████████| 6216/6216 [00:03<00:00, 1765.61it/s]
100%|██████████| 3041/3041 [00:04<00:00, 717.44it/s]
100%|██████████| 561/561 [00:01<00:00, 473.81it/s]
100%|██████████| 20/20 [00:00<00:00, 358.35it/s]


Partition 3:


100%|██████████| 5995/5995 [00:03<00:00, 1812.49it/s]
100%|██████████| 3101/3101 [00:03<00:00, 787.41it/s]
100%|██████████| 593/593 [00:01<00:00, 514.03it/s]
100%|██████████| 15/15 [00:00<00:00, 404.44it/s]


Partition 4:


100%|██████████| 6216/6216 [00:03<00:00, 1840.49it/s]
100%|██████████| 3244/3244 [00:04<00:00, 780.69it/s]
100%|██████████| 636/636 [00:01<00:00, 516.51it/s]
100%|██████████| 17/17 [00:00<00:00, 402.85it/s]


Partition 5:


100%|██████████| 6328/6328 [00:03<00:00, 1845.71it/s]
100%|██████████| 3340/3340 [00:04<00:00, 806.36it/s]
100%|██████████| 675/675 [00:01<00:00, 529.49it/s]
100%|██████████| 25/25 [00:00<00:00, 419.13it/s]


Partition 6:


100%|██████████| 5778/5778 [00:05<00:00, 997.39it/s] 
100%|██████████| 2975/2975 [00:05<00:00, 570.46it/s]
100%|██████████| 516/516 [00:01<00:00, 428.42it/s]
100%|██████████| 14/14 [00:00<00:00, 283.85it/s]


Partition 7:


100%|██████████| 5886/5886 [00:03<00:00, 1809.82it/s]
100%|██████████| 2866/2866 [00:03<00:00, 750.18it/s]
100%|██████████| 474/474 [00:00<00:00, 523.39it/s]
100%|██████████| 10/10 [00:00<00:00, 412.26it/s]


Partition 8:


100%|██████████| 5460/5460 [00:03<00:00, 1776.96it/s]
100%|██████████| 3045/3045 [00:04<00:00, 725.85it/s]
100%|██████████| 445/445 [00:00<00:00, 500.74it/s]
100%|██████████| 10/10 [00:00<00:00, 400.45it/s]


Partition 9:


100%|██████████| 5671/5671 [00:04<00:00, 1241.77it/s]
100%|██████████| 2824/2824 [00:04<00:00, 705.68it/s]
100%|██████████| 425/425 [00:00<00:00, 528.46it/s]
100%|██████████| 8/8 [00:00<00:00, 425.98it/s]


Partition 10:


100%|██████████| 5671/5671 [00:03<00:00, 1828.40it/s]
100%|██████████| 2984/2984 [00:03<00:00, 803.12it/s]
100%|██████████| 478/478 [00:00<00:00, 526.38it/s]
100%|██████████| 11/11 [00:00<00:00, 402.33it/s]


Partition 11:


100%|██████████| 6105/6105 [00:03<00:00, 1815.10it/s]
100%|██████████| 3033/3033 [00:03<00:00, 800.69it/s]
100%|██████████| 538/538 [00:01<00:00, 528.36it/s]
100%|██████████| 13/13 [00:00<00:00, 422.49it/s]


Partition 12:


100%|██████████| 6105/6105 [00:01<00:00, 3214.46it/s]
100%|██████████| 3156/3156 [00:02<00:00, 1369.55it/s]
100%|██████████| 715/715 [00:00<00:00, 983.98it/s] 
100%|██████████| 20/20 [00:00<00:00, 741.95it/s]


Partition 1:


100%|██████████| 7/7 [03:20<00:00, 28.69s/it]


Partition 2:


100%|██████████| 7/7 [03:22<00:00, 28.96s/it]


Partition 3:


100%|██████████| 7/7 [03:29<00:00, 29.97s/it]


Partition 4:


100%|██████████| 7/7 [03:30<00:00, 30.06s/it]


Partition 5:


100%|██████████| 7/7 [03:27<00:00, 29.70s/it]


Partition 6:


100%|██████████| 7/7 [03:39<00:00, 31.29s/it]


Partition 7:


100%|██████████| 7/7 [03:23<00:00, 29.11s/it]


Partition 8:


100%|██████████| 7/7 [03:38<00:00, 31.17s/it]


Partition 9:


100%|██████████| 7/7 [03:10<00:00, 27.18s/it]


Partition 10:


100%|██████████| 7/7 [03:32<00:00, 30.38s/it]


Partition 11:


100%|██████████| 7/7 [03:16<00:00, 28.01s/it]


Partition 12:


100%|██████████| 7/7 [01:39<00:00, 14.26s/it]


Partition 1:


100%|██████████| 780/780 [00:00<00:00, 1300.31it/s]
100%|██████████| 59/59 [00:00<00:00, 681.65it/s]
0it [00:00, ?it/s]


Partition 2:


100%|██████████| 820/820 [00:00<00:00, 1279.82it/s]
100%|██████████| 76/76 [00:00<00:00, 635.29it/s]
0it [00:00, ?it/s]


Partition 3:


100%|██████████| 820/820 [00:00<00:00, 1404.10it/s]
100%|██████████| 76/76 [00:00<00:00, 668.49it/s]
0it [00:00, ?it/s]


Partition 4:


100%|██████████| 903/903 [00:00<00:00, 1313.35it/s]
100%|██████████| 82/82 [00:00<00:00, 644.88it/s]
0it [00:00, ?it/s]


Partition 5:


100%|██████████| 903/903 [00:00<00:00, 1387.06it/s]
100%|██████████| 83/83 [00:00<00:00, 669.72it/s]
0it [00:00, ?it/s]


Partition 6:


100%|██████████| 820/820 [00:00<00:00, 1272.32it/s]
100%|██████████| 73/73 [00:00<00:00, 625.72it/s]
0it [00:00, ?it/s]


Partition 7:


100%|██████████| 780/780 [00:00<00:00, 1345.56it/s]
100%|██████████| 72/72 [00:00<00:00, 625.36it/s]
0it [00:00, ?it/s]


Partition 8:


100%|██████████| 820/820 [00:00<00:00, 1365.53it/s]
100%|██████████| 63/63 [00:00<00:00, 625.06it/s]
0it [00:00, ?it/s]


Partition 9:


100%|██████████| 820/820 [00:00<00:00, 1413.32it/s]
100%|██████████| 50/50 [00:00<00:00, 660.53it/s]
0it [00:00, ?it/s]


Partition 10:


100%|██████████| 903/903 [00:00<00:00, 1358.87it/s]
100%|██████████| 65/65 [00:00<00:00, 662.32it/s]
0it [00:00, ?it/s]


Partition 11:


100%|██████████| 861/861 [00:00<00:00, 1367.59it/s]
100%|██████████| 75/75 [00:00<00:00, 646.18it/s]
0it [00:00, ?it/s]


Partition 12:


100%|██████████| 820/820 [00:00<00:00, 2555.86it/s]
100%|██████████| 95/95 [00:00<00:00, 1317.96it/s]
0it [00:00, ?it/s]


Partition 1:


100%|██████████| 7/7 [03:10<00:00, 27.26s/it]


Partition 2:


100%|██████████| 7/7 [03:26<00:00, 29.55s/it]


Partition 3:


100%|██████████| 7/7 [03:21<00:00, 28.85s/it]


Partition 4:


100%|██████████| 7/7 [03:22<00:00, 28.87s/it]


Partition 5:


100%|██████████| 7/7 [03:33<00:00, 30.44s/it]


Partition 6:


100%|██████████| 7/7 [03:23<00:00, 29.06s/it]


Partition 7:


100%|██████████| 7/7 [03:42<00:00, 31.72s/it]


Partition 8:


100%|██████████| 7/7 [03:20<00:00, 28.66s/it]


Partition 9:


100%|██████████| 7/7 [03:02<00:00, 26.05s/it]


Partition 10:


100%|██████████| 7/7 [03:07<00:00, 26.85s/it]


Partition 11:


100%|██████████| 7/7 [03:16<00:00, 28.12s/it]


Partition 12:


100%|██████████| 7/7 [11:39<00:00, 99.96s/it] 


Partition 1:


100%|██████████| 153/153 [00:00<00:00, 1021.66it/s]
0it [00:00, ?it/s]


Partition 2:


100%|██████████| 136/136 [00:00<00:00, 850.35it/s]
0it [00:00, ?it/s]


Partition 3:


100%|██████████| 153/153 [00:00<00:00, 989.02it/s]
0it [00:00, ?it/s]


Partition 4:


100%|██████████| 171/171 [00:00<00:00, 590.10it/s]
0it [00:00, ?it/s]


Partition 5:


100%|██████████| 171/171 [00:00<00:00, 990.59it/s]
0it [00:00, ?it/s]


Partition 6:


100%|██████████| 153/153 [00:00<00:00, 777.98it/s] 
0it [00:00, ?it/s]


Partition 7:


100%|██████████| 153/153 [00:00<00:00, 867.51it/s]
0it [00:00, ?it/s]


Partition 8:


100%|██████████| 153/153 [00:00<00:00, 950.45it/s]
0it [00:00, ?it/s]


Partition 9:


100%|██████████| 153/153 [00:00<00:00, 886.58it/s]
0it [00:00, ?it/s]


Partition 10:


100%|██████████| 153/153 [00:00<00:00, 903.28it/s]
0it [00:00, ?it/s]


Partition 11:


100%|██████████| 153/153 [00:00<00:00, 840.70it/s]
0it [00:00, ?it/s]


Partition 12:


100%|██████████| 120/120 [00:00<00:00, 2235.34it/s]
0it [00:00, ?it/s]


Partition 1:


100%|██████████| 7/7 [03:11<00:00, 27.41s/it]


Partition 2:


100%|██████████| 7/7 [03:27<00:00, 29.63s/it]


Partition 3:


100%|██████████| 7/7 [03:53<00:00, 33.29s/it]


Partition 4:


100%|██████████| 7/7 [03:10<00:00, 27.27s/it]


Partition 5:


100%|██████████| 7/7 [03:04<00:00, 26.36s/it]


Partition 6:


100%|██████████| 7/7 [03:16<00:00, 28.04s/it]


Partition 7:


100%|██████████| 7/7 [03:16<00:00, 28.07s/it]


Partition 8:


100%|██████████| 7/7 [03:18<00:00, 28.35s/it]


Partition 9:


100%|██████████| 7/7 [03:12<00:00, 27.48s/it]


Partition 10:


100%|██████████| 7/7 [03:13<00:00, 27.57s/it]


Partition 11:


100%|██████████| 7/7 [03:13<00:00, 27.61s/it]


Partition 12:


100%|██████████| 7/7 [01:40<00:00, 14.38s/it]


Partition 1:


100%|██████████| 1/1 [00:00<00:00, 838.36it/s]


Partition 2:


100%|██████████| 1/1 [00:00<00:00, 755.87it/s]


Partition 3:


100%|██████████| 1/1 [00:00<00:00, 808.31it/s]


Partition 4:


100%|██████████| 1/1 [00:00<00:00, 841.89it/s]


Partition 5:


100%|██████████| 1/1 [00:00<00:00, 815.70it/s]


Partition 6:


100%|██████████| 1/1 [00:00<00:00, 849.57it/s]


Partition 7:


100%|██████████| 1/1 [00:00<00:00, 755.46it/s]


Partition 8:


100%|██████████| 1/1 [00:00<00:00, 765.24it/s]


Partition 9:


100%|██████████| 1/1 [00:00<00:00, 750.19it/s]


Partition 10:


100%|██████████| 1/1 [00:00<00:00, 775.72it/s]


Partition 11:


100%|██████████| 1/1 [00:00<00:00, 854.76it/s]


Partition 12:


100%|██████████| 1/1 [00:00<00:00, 1736.05it/s]


Partition 1:


100%|██████████| 7/7 [03:11<00:00, 27.30s/it]


Partition 2:


 29%|██▊       | 2/7 [00:45<01:54, 22.93s/it]


KeyboardInterrupt: 

In [None]:
# data = pd.read_csv('dataset/final_animedataset.csv')
# data = data[['username', 'title']]
# grouped_data = data.groupby('username')['title'].apply(list)
# grouped_data = grouped_data.to_dict()

In [None]:
# frequent_anime_set

In [None]:
# itemset, _ = apriori(list(grouped_data.values()), min_support = min_support, verbosity=2)

In [None]:
# itemset