DMML Assignment-1 | Example runs on Kos and Nips dataset

In [3]:
import time
from collections import defaultdict


def read_vocab(file_path):
    """ Read the vocabulary file and map word IDs to words. """
    vocab = {}
    with open(file_path, 'r') as f:
        for i, line in enumerate(f, start=1):
            vocab[i] = line.strip()
    return vocab

def read_docword(file_path):
    """ Read the document-word file and store word occurrences efficiently. """
    with open(file_path, 'r') as f:
        D = int(f.readline().strip())  # Number of documents
        W = int(f.readline().strip())  # Number of words
        NNZ = int(f.readline().strip())  # Nonzero entries

        word_docs = defaultdict(set)  # wordID -> set of documents containing it
        for _ in range(NNZ):
            doc_id, word_id, count = map(int, f.readline().strip().split())
            word_docs[word_id].add(doc_id)

    return word_docs

def apriori(word_docs, K, F):
    """ Optimized Apriori algorithm for large datasets. """

    # Step 1: Find frequent 1-itemsets
    freq_itemsets = { (word,): docs for word, docs in word_docs.items() if len(docs) >= F }

    # Step 2: Generate k-itemsets iteratively
    for k in range(2, K + 1):
        candidates = set()
        freq_keys = list(freq_itemsets.keys())  # List of current frequent itemsets

        # Generate candidate itemsets of size k using (k-1)-itemsets
        for i in range(len(freq_keys)):
            for j in range(i + 1, len(freq_keys)):
                a, b = freq_keys[i], freq_keys[j]

                # Merge only if first (k-2) elements are same (Efficient pruning)
                if a[:-1] == b[:-1]:
                    new_itemset = tuple(sorted(set(a) | set(b)))  # Union
                    if len(new_itemset) == k:
                        candidates.add(new_itemset)

        # Count support for candidate itemsets
        new_freq_itemsets = {}
        for c in candidates:
            intersect_docs = set.intersection(*(word_docs[word] for word in c))
            if len(intersect_docs) >= F:
                new_freq_itemsets[c] = intersect_docs

        # If no new frequent itemsets, break early
        if not new_freq_itemsets:
            return []

        freq_itemsets = new_freq_itemsets

    # Convert sets to counts for readability
    return sorted([(itemset, len(docs)) for itemset, docs in freq_itemsets.items()],
                  key=lambda x: x[1], reverse=True)

def main(vocab_file, docword_file, K, F):
    print("Reading dataset...")
    word_docs = read_docword(docword_file)
    vocab = read_vocab(vocab_file)

    print(f"Running Apriori for K={K}, F={F}")
    start_time = time.time()
    frequent_itemsets = apriori(word_docs, K, F)

    elapsed = time.time() - start_time

    print(f"\nTime taken: {elapsed:.2f} seconds")

    if not frequent_itemsets:
        print("\nNo itemsets found.")
    else:
        print(f"\nTotal Frequent K-itemsets Found: {len(frequent_itemsets)}")
        print("Frequent K-itemsets:")
        for itemset, count in frequent_itemsets:
            print(f"Itemset: {tuple(vocab[word] for word in itemset)}, Count: {count}")




# **NIPS dataset**

## K = 2 and F = 1300

In [4]:
main("vocab.nips.txt", "docword.nips.txt", K=2, F=1300)

Reading dataset...
Running Apriori for K=2, F=1300

Time taken: 0.01 seconds

Total Frequent K-itemsets Found: 10
Frequent K-itemsets:
Itemset: ('abstract', 'references'), Count: 1411
Itemset: ('abstract', 'result'), Count: 1389
Itemset: ('references', 'result'), Count: 1358
Itemset: ('abstract', 'function'), Count: 1355
Itemset: ('abstract', 'neural'), Count: 1343
Itemset: ('abstract', 'system'), Count: 1329
Itemset: ('function', 'references'), Count: 1327
Itemset: ('abstract', 'set'), Count: 1324
Itemset: ('neural', 'references'), Count: 1314
Itemset: ('function', 'result'), Count: 1309


# K = 2 and F = 1200

In [5]:
main("vocab.nips.txt", "docword.nips.txt", K=2, F=1200)

Reading dataset...
Running Apriori for K=2, F=1200

Time taken: 0.02 seconds

Total Frequent K-itemsets Found: 38
Frequent K-itemsets:
Itemset: ('abstract', 'references'), Count: 1411
Itemset: ('abstract', 'result'), Count: 1389
Itemset: ('references', 'result'), Count: 1358
Itemset: ('abstract', 'function'), Count: 1355
Itemset: ('abstract', 'neural'), Count: 1343
Itemset: ('abstract', 'system'), Count: 1329
Itemset: ('function', 'references'), Count: 1327
Itemset: ('abstract', 'set'), Count: 1324
Itemset: ('neural', 'references'), Count: 1314
Itemset: ('function', 'result'), Count: 1309
Itemset: ('references', 'system'), Count: 1299
Itemset: ('references', 'set'), Count: 1297
Itemset: ('neural', 'result'), Count: 1296
Itemset: ('abstract', 'introduction'), Count: 1291
Itemset: ('result', 'system'), Count: 1281
Itemset: ('result', 'set'), Count: 1280
Itemset: ('abstract', 'network'), Count: 1277
Itemset: ('abstract', 'number'), Count: 1276
Itemset: ('function', 'neural'), Count: 1274


# K = 2 and F = 1100

In [6]:
main("vocab.nips.txt", "docword.nips.txt", K=2, F=1100)

Reading dataset...
Running Apriori for K=2, F=1100

Time taken: 0.04 seconds

Total Frequent K-itemsets Found: 82
Frequent K-itemsets:
Itemset: ('abstract', 'references'), Count: 1411
Itemset: ('abstract', 'result'), Count: 1389
Itemset: ('references', 'result'), Count: 1358
Itemset: ('abstract', 'function'), Count: 1355
Itemset: ('abstract', 'neural'), Count: 1343
Itemset: ('abstract', 'system'), Count: 1329
Itemset: ('function', 'references'), Count: 1327
Itemset: ('abstract', 'set'), Count: 1324
Itemset: ('neural', 'references'), Count: 1314
Itemset: ('function', 'result'), Count: 1309
Itemset: ('references', 'system'), Count: 1299
Itemset: ('references', 'set'), Count: 1297
Itemset: ('neural', 'result'), Count: 1296
Itemset: ('abstract', 'introduction'), Count: 1291
Itemset: ('result', 'system'), Count: 1281
Itemset: ('result', 'set'), Count: 1280
Itemset: ('abstract', 'network'), Count: 1277
Itemset: ('abstract', 'number'), Count: 1276
Itemset: ('function', 'neural'), Count: 1274


# K = 2 and F = 1000

In [7]:
main("vocab.nips.txt", "docword.nips.txt", K=2, F=1000)

Reading dataset...
Running Apriori for K=2, F=1000

Time taken: 0.15 seconds

Total Frequent K-itemsets Found: 179
Frequent K-itemsets:
Itemset: ('abstract', 'references'), Count: 1411
Itemset: ('abstract', 'result'), Count: 1389
Itemset: ('references', 'result'), Count: 1358
Itemset: ('abstract', 'function'), Count: 1355
Itemset: ('abstract', 'neural'), Count: 1343
Itemset: ('abstract', 'system'), Count: 1329
Itemset: ('function', 'references'), Count: 1327
Itemset: ('abstract', 'set'), Count: 1324
Itemset: ('neural', 'references'), Count: 1314
Itemset: ('function', 'result'), Count: 1309
Itemset: ('references', 'system'), Count: 1299
Itemset: ('references', 'set'), Count: 1297
Itemset: ('neural', 'result'), Count: 1296
Itemset: ('abstract', 'introduction'), Count: 1291
Itemset: ('result', 'system'), Count: 1281
Itemset: ('result', 'set'), Count: 1280
Itemset: ('abstract', 'network'), Count: 1277
Itemset: ('abstract', 'number'), Count: 1276
Itemset: ('function', 'neural'), Count: 1274

## K = 2 and F = 900

In [8]:
main("vocab.nips.txt", "docword.nips.txt", K=2, F=900)

Reading dataset...
Running Apriori for K=2, F=900

Time taken: 0.29 seconds

Total Frequent K-itemsets Found: 365
Frequent K-itemsets:
Itemset: ('abstract', 'references'), Count: 1411
Itemset: ('abstract', 'result'), Count: 1389
Itemset: ('references', 'result'), Count: 1358
Itemset: ('abstract', 'function'), Count: 1355
Itemset: ('abstract', 'neural'), Count: 1343
Itemset: ('abstract', 'system'), Count: 1329
Itemset: ('function', 'references'), Count: 1327
Itemset: ('abstract', 'set'), Count: 1324
Itemset: ('neural', 'references'), Count: 1314
Itemset: ('function', 'result'), Count: 1309
Itemset: ('references', 'system'), Count: 1299
Itemset: ('references', 'set'), Count: 1297
Itemset: ('neural', 'result'), Count: 1296
Itemset: ('abstract', 'introduction'), Count: 1291
Itemset: ('result', 'system'), Count: 1281
Itemset: ('result', 'set'), Count: 1280
Itemset: ('abstract', 'network'), Count: 1277
Itemset: ('abstract', 'number'), Count: 1276
Itemset: ('function', 'neural'), Count: 1274


## K = 2 and F = 800

In [9]:
main("vocab.nips.txt", "docword.nips.txt", K=2, F=800)

Reading dataset...
Running Apriori for K=2, F=800

Time taken: 0.80 seconds

Total Frequent K-itemsets Found: 724
Frequent K-itemsets:
Itemset: ('abstract', 'references'), Count: 1411
Itemset: ('abstract', 'result'), Count: 1389
Itemset: ('references', 'result'), Count: 1358
Itemset: ('abstract', 'function'), Count: 1355
Itemset: ('abstract', 'neural'), Count: 1343
Itemset: ('abstract', 'system'), Count: 1329
Itemset: ('function', 'references'), Count: 1327
Itemset: ('abstract', 'set'), Count: 1324
Itemset: ('neural', 'references'), Count: 1314
Itemset: ('function', 'result'), Count: 1309
Itemset: ('references', 'system'), Count: 1299
Itemset: ('references', 'set'), Count: 1297
Itemset: ('neural', 'result'), Count: 1296
Itemset: ('abstract', 'introduction'), Count: 1291
Itemset: ('result', 'system'), Count: 1281
Itemset: ('result', 'set'), Count: 1280
Itemset: ('abstract', 'network'), Count: 1277
Itemset: ('abstract', 'number'), Count: 1276
Itemset: ('function', 'neural'), Count: 1274


## K = 3 and F = 1200

In [10]:
main("vocab.nips.txt", "docword.nips.txt", K=3, F=1200)

Reading dataset...
Running Apriori for K=3, F=1200

Time taken: 0.05 seconds

Total Frequent K-itemsets Found: 36
Frequent K-itemsets:
Itemset: ('abstract', 'references', 'result'), Count: 1346
Itemset: ('abstract', 'function', 'references'), Count: 1315
Itemset: ('abstract', 'neural', 'references'), Count: 1302
Itemset: ('abstract', 'function', 'result'), Count: 1292
Itemset: ('abstract', 'references', 'system'), Count: 1289
Itemset: ('abstract', 'references', 'set'), Count: 1287
Itemset: ('abstract', 'neural', 'result'), Count: 1280
Itemset: ('abstract', 'result', 'system'), Count: 1266
Itemset: ('abstract', 'result', 'set'), Count: 1265
Itemset: ('function', 'references', 'result'), Count: 1265
Itemset: ('abstract', 'function', 'neural'), Count: 1254
Itemset: ('neural', 'references', 'result'), Count: 1251
Itemset: ('abstract', 'introduction', 'references'), Count: 1250
Itemset: ('abstract', 'neural', 'system'), Count: 1244
Itemset: ('abstract', 'number', 'references'), Count: 1239


## K = 4 and F = 1000

In [11]:
main("vocab.nips.txt", "docword.nips.txt", K=4, F=1000)

Reading dataset...
Running Apriori for K=4, F=1000

Time taken: 0.78 seconds

Total Frequent K-itemsets Found: 362
Frequent K-itemsets:
Itemset: ('abstract', 'function', 'references', 'result'), Count: 1253
Itemset: ('abstract', 'neural', 'references', 'result'), Count: 1240
Itemset: ('abstract', 'references', 'result', 'set'), Count: 1229
Itemset: ('abstract', 'references', 'result', 'system'), Count: 1227
Itemset: ('abstract', 'function', 'neural', 'references'), Count: 1217
Itemset: ('abstract', 'neural', 'references', 'system'), Count: 1207
Itemset: ('abstract', 'function', 'references', 'set'), Count: 1204
Itemset: ('abstract', 'function', 'references', 'system'), Count: 1197
Itemset: ('abstract', 'introduction', 'references', 'result'), Count: 1195
Itemset: ('abstract', 'network', 'neural', 'references'), Count: 1194
Itemset: ('abstract', 'function', 'neural', 'result'), Count: 1194
Itemset: ('abstract', 'network', 'references', 'result'), Count: 1192
Itemset: ('abstract', 'neura

## K = 5 and F = 1100

In [12]:
main("vocab.nips.txt", "docword.nips.txt", K=5, F=1100)

Reading dataset...
Running Apriori for K=5, F=1100

Time taken: 0.22 seconds

Total Frequent K-itemsets Found: 18
Frequent K-itemsets:
Itemset: ('abstract', 'function', 'neural', 'references', 'result'), Count: 1158
Itemset: ('abstract', 'network', 'neural', 'references', 'result'), Count: 1149
Itemset: ('abstract', 'function', 'references', 'result', 'set'), Count: 1148
Itemset: ('abstract', 'neural', 'references', 'result', 'system'), Count: 1147
Itemset: ('abstract', 'function', 'references', 'result', 'system'), Count: 1138
Itemset: ('abstract', 'neural', 'references', 'result', 'set'), Count: 1133
Itemset: ('abstract', 'function', 'neural', 'references', 'system'), Count: 1123
Itemset: ('abstract', 'references', 'result', 'set', 'system'), Count: 1118
Itemset: ('abstract', 'function', 'neural', 'references', 'set'), Count: 1115
Itemset: ('abstract', 'function', 'network', 'neural', 'references'), Count: 1113
Itemset: ('abstract', 'function', 'network', 'references', 'result'), Cou

## K = 7 and F = 900

In [13]:
main("vocab.nips.txt", "docword.nips.txt", K=7, F=900)

Reading dataset...
Running Apriori for K=7, F=900

Time taken: 4.40 seconds

Total Frequent K-itemsets Found: 46
Frequent K-itemsets:
Itemset: ('abstract', 'function', 'network', 'neural', 'references', 'result', 'system'), Count: 989
Itemset: ('abstract', 'function', 'network', 'neural', 'references', 'result', 'set'), Count: 985
Itemset: ('abstract', 'function', 'neural', 'references', 'result', 'set', 'system'), Count: 977
Itemset: ('abstract', 'network', 'neural', 'references', 'result', 'set', 'system'), Count: 976
Itemset: ('abstract', 'function', 'introduction', 'network', 'neural', 'references', 'result'), Count: 949
Itemset: ('abstract', 'function', 'neural', 'number', 'references', 'result', 'set'), Count: 947
Itemset: ('abstract', 'introduction', 'network', 'neural', 'references', 'result', 'system'), Count: 946
Itemset: ('abstract', 'function', 'network', 'neural', 'references', 'set', 'system'), Count: 945
Itemset: ('abstract', 'function', 'introduction', 'neural', 'refere

# K = 8 and F = 800

In [14]:
main("vocab.nips.txt", "docword.nips.txt", K=8, F=800)

Reading dataset...
Running Apriori for K=8, F=800

Time taken: 30.00 seconds

Total Frequent K-itemsets Found: 64
Frequent K-itemsets:
Itemset: ('abstract', 'function', 'network', 'neural', 'references', 'result', 'set', 'system'), Count: 910
Itemset: ('abstract', 'function', 'network', 'neural', 'number', 'references', 'result', 'set'), Count: 878
Itemset: ('abstract', 'function', 'introduction', 'network', 'neural', 'references', 'result', 'set'), Count: 876
Itemset: ('abstract', 'function', 'introduction', 'network', 'neural', 'references', 'result', 'system'), Count: 873
Itemset: ('abstract', 'function', 'network', 'neural', 'number', 'references', 'result', 'system'), Count: 871
Itemset: ('abstract', 'introduction', 'network', 'neural', 'references', 'result', 'set', 'system'), Count: 871
Itemset: ('abstract', 'function', 'neural', 'number', 'references', 'result', 'set', 'system'), Count: 870
Itemset: ('abstract', 'network', 'neural', 'number', 'references', 'result', 'set', 'sys

## K = 10 and F = 800

In [15]:
main("vocab.nips.txt", "docword.nips.txt", K=10, F=800)

Reading dataset...
Running Apriori for K=10, F=800

Time taken: 29.61 seconds

No itemsets found.


--------------------------------------------------------------------------------------------------------------

# KOS dataset

## K = 2 and F = 1200

In [16]:
main("vocab.kos.txt", "docword.kos.txt", K=2, F=1200)

Reading dataset...
Running Apriori for K=2, F=1200

Time taken: 0.01 seconds

Total Frequent K-itemsets Found: 1
Frequent K-itemsets:
Itemset: ('bush', 'general'), Count: 1250


## K = 2 and F = 1100

In [17]:
main("vocab.kos.txt", "docword.kos.txt", K=2, F=1100)

Reading dataset...
Running Apriori for K=2, F=1100

Time taken: 0.01 seconds

Total Frequent K-itemsets Found: 2
Frequent K-itemsets:
Itemset: ('bush', 'general'), Count: 1250
Itemset: ('bush', 'kerry'), Count: 1195


## K = 2 and F = 1000

In [18]:
main("vocab.kos.txt", "docword.kos.txt", K=2, F=1000)

Reading dataset...
Running Apriori for K=2, F=1000

Time taken: 0.01 seconds

Total Frequent K-itemsets Found: 3
Frequent K-itemsets:
Itemset: ('bush', 'general'), Count: 1250
Itemset: ('bush', 'kerry'), Count: 1195
Itemset: ('general', 'kerry'), Count: 1064


## K = 2 and F = 900

In [19]:
main("vocab.kos.txt", "docword.kos.txt", K=2, F=900)

Reading dataset...
Running Apriori for K=2, F=900

Time taken: 0.03 seconds

Total Frequent K-itemsets Found: 6
Frequent K-itemsets:
Itemset: ('bush', 'general'), Count: 1250
Itemset: ('bush', 'kerry'), Count: 1195
Itemset: ('general', 'kerry'), Count: 1064
Itemset: ('bush', 'war'), Count: 994
Itemset: ('bush', 'democratic'), Count: 937
Itemset: ('democratic', 'kerry'), Count: 922


## K = 2 and F = 800

In [20]:
main("vocab.kos.txt", "docword.kos.txt", K=2, F=800)

Reading dataset...
Running Apriori for K=2, F=800

Time taken: 0.03 seconds

Total Frequent K-itemsets Found: 11
Frequent K-itemsets:
Itemset: ('bush', 'general'), Count: 1250
Itemset: ('bush', 'kerry'), Count: 1195
Itemset: ('general', 'kerry'), Count: 1064
Itemset: ('bush', 'war'), Count: 994
Itemset: ('bush', 'democratic'), Count: 937
Itemset: ('democratic', 'kerry'), Count: 922
Itemset: ('democratic', 'primary'), Count: 894
Itemset: ('bush', 'poll'), Count: 876
Itemset: ('bush', 'house'), Count: 873
Itemset: ('kerry', 'poll'), Count: 845
Itemset: ('bush', 'president'), Count: 834


## K = 2 and F = 700

In [21]:
main("vocab.kos.txt", "docword.kos.txt", K=2, F=700)

Reading dataset...
Running Apriori for K=2, F=700

Time taken: 0.05 seconds

Total Frequent K-itemsets Found: 23
Frequent K-itemsets:
Itemset: ('bush', 'general'), Count: 1250
Itemset: ('bush', 'kerry'), Count: 1195
Itemset: ('general', 'kerry'), Count: 1064
Itemset: ('bush', 'war'), Count: 994
Itemset: ('bush', 'democratic'), Count: 937
Itemset: ('democratic', 'kerry'), Count: 922
Itemset: ('democratic', 'primary'), Count: 894
Itemset: ('bush', 'poll'), Count: 876
Itemset: ('bush', 'house'), Count: 873
Itemset: ('kerry', 'poll'), Count: 845
Itemset: ('bush', 'president'), Count: 834
Itemset: ('bush', 'republicans'), Count: 794
Itemset: ('bush', 'democrats'), Count: 783
Itemset: ('bush', 'election'), Count: 766
Itemset: ('democratic', 'general'), Count: 763
Itemset: ('general', 'poll'), Count: 756
Itemset: ('democratic', 'poll'), Count: 755
Itemset: ('administration', 'bush'), Count: 748
Itemset: ('general', 'war'), Count: 745
Itemset: ('democratic', 'democrats'), Count: 740
Itemset: (

# K = 2 and F = 600

In [22]:
main("vocab.kos.txt", "docword.kos.txt", K=2, F=600)

Reading dataset...
Running Apriori for K=2, F=600

Time taken: 0.06 seconds

Total Frequent K-itemsets Found: 54
Frequent K-itemsets:
Itemset: ('bush', 'general'), Count: 1250
Itemset: ('bush', 'kerry'), Count: 1195
Itemset: ('general', 'kerry'), Count: 1064
Itemset: ('bush', 'war'), Count: 994
Itemset: ('bush', 'democratic'), Count: 937
Itemset: ('democratic', 'kerry'), Count: 922
Itemset: ('democratic', 'primary'), Count: 894
Itemset: ('bush', 'poll'), Count: 876
Itemset: ('bush', 'house'), Count: 873
Itemset: ('kerry', 'poll'), Count: 845
Itemset: ('bush', 'president'), Count: 834
Itemset: ('bush', 'republicans'), Count: 794
Itemset: ('bush', 'democrats'), Count: 783
Itemset: ('bush', 'election'), Count: 766
Itemset: ('democratic', 'general'), Count: 763
Itemset: ('general', 'poll'), Count: 756
Itemset: ('democratic', 'poll'), Count: 755
Itemset: ('administration', 'bush'), Count: 748
Itemset: ('general', 'war'), Count: 745
Itemset: ('democratic', 'democrats'), Count: 740
Itemset: (

## K = 2 and F = 500

In [23]:
main("vocab.kos.txt", "docword.kos.txt", K=2, F=500)

Reading dataset...
Running Apriori for K=2, F=500

Time taken: 0.11 seconds

Total Frequent K-itemsets Found: 101
Frequent K-itemsets:
Itemset: ('bush', 'general'), Count: 1250
Itemset: ('bush', 'kerry'), Count: 1195
Itemset: ('general', 'kerry'), Count: 1064
Itemset: ('bush', 'war'), Count: 994
Itemset: ('bush', 'democratic'), Count: 937
Itemset: ('democratic', 'kerry'), Count: 922
Itemset: ('democratic', 'primary'), Count: 894
Itemset: ('bush', 'poll'), Count: 876
Itemset: ('bush', 'house'), Count: 873
Itemset: ('kerry', 'poll'), Count: 845
Itemset: ('bush', 'president'), Count: 834
Itemset: ('bush', 'republicans'), Count: 794
Itemset: ('bush', 'democrats'), Count: 783
Itemset: ('bush', 'election'), Count: 766
Itemset: ('democratic', 'general'), Count: 763
Itemset: ('general', 'poll'), Count: 756
Itemset: ('democratic', 'poll'), Count: 755
Itemset: ('administration', 'bush'), Count: 748
Itemset: ('general', 'war'), Count: 745
Itemset: ('democratic', 'democrats'), Count: 740
Itemset: 

## K = 3 and F = 700

In [30]:
main("vocab.kos.txt", "docword.kos.txt", K=3, F=700)

Reading dataset...
Running Apriori for K=3, F=700

Time taken: 0.06 seconds

Total Frequent K-itemsets Found: 1
Frequent K-itemsets:
Itemset: ('bush', 'general', 'kerry'), Count: 926


## K = 4 and F = 550

In [32]:
main("vocab.kos.txt", "docword.kos.txt", K=4, F=550)

Reading dataset...
Running Apriori for K=4, F=550

Time taken: 0.15 seconds

Total Frequent K-itemsets Found: 1
Frequent K-itemsets:
Itemset: ('bush', 'general', 'kerry', 'poll'), Count: 562


## K = 4 and F = 500

In [26]:
main("vocab.kos.txt", "docword.kos.txt", K=4, F=500)

Reading dataset...
Running Apriori for K=4, F=500

Time taken: 0.24 seconds

Total Frequent K-itemsets Found: 5
Frequent K-itemsets:
Itemset: ('bush', 'general', 'kerry', 'poll'), Count: 562
Itemset: ('bush', 'democratic', 'general', 'kerry'), Count: 531
Itemset: ('bush', 'general', 'kerry', 'war'), Count: 517
Itemset: ('democratic', 'kerry', 'poll', 'primary'), Count: 504
Itemset: ('bush', 'general', 'kerry', 'polls'), Count: 504


## K = 5 and F = **400**

In [27]:
main("vocab.kos.txt", "docword.kos.txt", K=5, F=400)

Reading dataset...
Running Apriori for K=5, F=400

Time taken: 0.86 seconds

Total Frequent K-itemsets Found: 5
Frequent K-itemsets:
Itemset: ('bush', 'general', 'kerry', 'poll', 'polls'), Count: 437
Itemset: ('bush', 'democratic', 'democrats', 'general', 'republicans'), Count: 409
Itemset: ('bush', 'democratic', 'democrats', 'house', 'republicans'), Count: 403
Itemset: ('democratic', 'kerry', 'poll', 'polls', 'primary'), Count: 400
Itemset: ('bush', 'democratic', 'general', 'kerry', 'republicans'), Count: 400


## K = 7 and F = 350

In [28]:
main("vocab.kos.txt", "docword.kos.txt", K=7, F=350)

Reading dataset...
Running Apriori for K=7, F=350

Time taken: 3.29 seconds

Total Frequent K-itemsets Found: 1
Frequent K-itemsets:
Itemset: ('bush', 'democratic', 'democrats', 'general', 'house', 'republicans', 'senate'), Count: 351


## K = 10 and F = 350

In [29]:
main("vocab.kos.txt", "docword.kos.txt", K=10, F=350)

Reading dataset...
Running Apriori for K=10, F=350

Time taken: 4.27 seconds

No itemsets found.
