In [4]:
import numpy as np
from collections import defaultdict

# Example list of transactions
transactions = [
    [1, 2, 3],
    [1, 3, 5],
    [3, 5, 6],
    [2, 3, 4],
    [2, 4, 6],
    [1, 2, 4],
    [3, 4, 5],
    [1, 3, 4],
    [2, 3, 5],
    [4, 5, 6],
    [2, 4, 5],
    [3, 4, 6]
]

# Step 1: Get unique items
unique = []
for transaction in transactions:
    for item in transaction:
        if item not in unique:
            unique.append(item)

print("Unique items:", unique)

# Step 2: Count occurrences of each unique item
count = []
for item in unique:
    item_count = sum(transaction.count(item) for transaction in transactions)
    count.append(item_count)

print("Counts of each unique item:", count)

# Step 3: Create pairs of each unique item
pairs = []
for i in range(len(unique)):
    for j in range(i + 1, len(unique)):
        pairs.append((unique[i], unique[j]))

print("Pairs of unique items:", pairs)

# Step 4: Check if pair is available in transaction and count its occurrences
pair_counts = {}
for pair in pairs:
    count = 0
    for transaction in transactions:
        if pair[0] in transaction and pair[1] in transaction:
            count += 1
    pair_counts[pair] = count

print("Counts of each pair in transactions:", pair_counts)

# Step 5: Apply threshold and hash function
threshold = 4
bucket_size = int(input("Enter Bucket Size value: "))

# First hash function
def hash_function(i, j):
    return (i * j) % bucket_size

# Filter pairs based on threshold and calculate hash values using item values, not their indices
hash_table = []
for pair, count in pair_counts.items():
    if count >= threshold:
        # Use the actual item values for hashing
        hash_value = hash_function(pair[0], pair[1])
        hash_table.append((pair, count, hash_value))

print("Filtered pairs, counts, and hash values:")
for entry in hash_table:
    print(f"Pair: {entry[0]}, Count: {entry[1]}, Hash Value: {entry[2]}")

# Step 6: Create a table of pairs, their counts, and their hash values
print("\nTable of pairs, counts, and hash values:")
print("Pair\t\tCount\tHash Value")
for entry in hash_table:
    print(f"{entry[0]}\t{entry[1]}\t{entry[2]}")

# Step 7: Second phase using a second hash function
def second_hash_function(pair):
    return (pair[0] + pair[1]) % bucket_size  # Example of a simple hash

# Count occurrences of pairs based on the first hash table
hash_table_counts = defaultdict(int)
hash_table_hash_values = {}  # To store hash values for the second phase

for transaction in transactions:
    unique_items = set(transaction)
    for i in unique_items:
        for j in unique_items:
            if i < j:  # To avoid duplicate pairs
                # Use the second hash function to determine bucket
                hash_value = second_hash_function((i, j))
                # Check if the pair is in the hash table created in phase 1
                if (i, j) in [entry[0] for entry in hash_table]:
                    hash_table_counts[(i, j)] += 1
                    hash_table_hash_values[(i, j)] = hash_value  # Store hash value

# Print the counts of pairs based on the second phase
print("\nCounts of pairs in the second phase:")
print("Pair\t\tCount\tHash Value")
for pair, count in hash_table_counts.items():
    hash_value = hash_table_hash_values.get(pair, None)  # Get hash value for the pair
    print(f"Pair: {pair}, Count: {count}, Hash Value: {hash_value}")


Unique items: [1, 2, 3, 5, 6, 4]
Counts of each unique item: [4, 6, 8, 6, 4, 8]
Pairs of unique items: [(1, 2), (1, 3), (1, 5), (1, 6), (1, 4), (2, 3), (2, 5), (2, 6), (2, 4), (3, 5), (3, 6), (3, 4), (5, 6), (5, 4), (6, 4)]
Counts of each pair in transactions: {(1, 2): 2, (1, 3): 3, (1, 5): 1, (1, 6): 0, (1, 4): 2, (2, 3): 3, (2, 5): 2, (2, 6): 1, (2, 4): 4, (3, 5): 4, (3, 6): 2, (3, 4): 4, (5, 6): 2, (5, 4): 3, (6, 4): 3}
Enter Bucket Size value: 3
Filtered pairs, counts, and hash values:
Pair: (2, 4), Count: 4, Hash Value: 2
Pair: (3, 5), Count: 4, Hash Value: 0
Pair: (3, 4), Count: 4, Hash Value: 0

Table of pairs, counts, and hash values:
Pair		Count	Hash Value
(2, 4)	4	2
(3, 5)	4	0
(3, 4)	4	0

Counts of pairs in the second phase:
Pair		Count	Hash Value
Pair: (3, 5), Count: 4, Hash Value: 2
Pair: (2, 4), Count: 4, Hash Value: 0
Pair: (3, 4), Count: 4, Hash Value: 1
