In [7]:
import requests

def load_data_from_url(url):
    transactions = []
    with requests.get(url, stream=True) as response:
        for line in response.iter_lines(decode_unicode=True):
            if line:
                transactions.append(line.split())
    return transactions

# def load_data_from_url(url):
#     response = requests.get(url)
#     data = response.text.splitlines()
#     transactions = [line.split() for line in data]
#     return transactions

def calculate_support(itemset, transactions):
    itemset = set(itemset)
    count = sum(1 for transaction in transactions if itemset.issubset(transaction))
    return count / len(transactions)

# Generování kandidátských množin (kombinací)
def generate_combinations(items, length):
    def combinations_helper(prefix, start, length):
        if length == 0:
            result.append(prefix)
            return
        for i in range(start, len(items)):
            combinations_helper(prefix + [items[i]], i + 1, length - 1)

    result = []
    combinations_helper([], 0, length)
    return result

# Filtrování častých množin podle podpory
def filter_frequent_itemsets(candidates, transactions, min_support):
    frequent_itemsets = []
    for itemset in candidates:
        support = calculate_support(itemset, transactions)
        if support >= min_support:
            frequent_itemsets.append((itemset, support))
    return frequent_itemsets

# Generování asociačních pravidel
def generate_rules(frequent_itemsets, transactions, min_confidence):
    rules = []
    for itemset, support in frequent_itemsets:
        if len(itemset) > 1:  # Pravidla generujeme pouze pro množiny s více než jedním prvkem
            for i in range(1, len(itemset)):
                antecedents = generate_combinations(itemset, i)
                for antecedent in antecedents:
                    consequent = list(set(itemset) - set(antecedent))
                    antecedent_support = calculate_support(antecedent, transactions)
                    if antecedent_support > 0:
                        confidence = support / antecedent_support
                        if confidence >= min_confidence:
                            rules.append((antecedent, consequent, confidence))
    return rules

def apriori(transactions, min_support, min_confidence):
    items = set(item for transaction in transactions for item in transaction)
    all_frequent_itemsets = []
    itemset_count = {}
    length = 1
    while True:
        candidates = generate_combinations(list(items), length)
        frequent_itemsets = filter_frequent_itemsets(candidates, transactions, min_support)

        if not frequent_itemsets:
            break

        itemset_count[length] = len(frequent_itemsets)
        all_frequent_itemsets.extend(frequent_itemsets)
        length += 1

    rules = generate_rules(all_frequent_itemsets, transactions, min_confidence)

    return all_frequent_itemsets, rules, itemset_count

#url = 'https://homel.vsb.cz/~pro0199/files/data_association_rules/itemsets_test.dat'
url = 'https://homel.vsb.cz/~pro0199/files/data_association_rules/chess.dat'

transactions = load_data_from_url(url)
min_support = 0.9
min_confidence = 0.5

frequent_itemsets, rules, itemset_count = apriori(transactions, min_support, min_confidence)

print("Frequent sets:")
for itemset, support in frequent_itemsets:
    print(f"Set: {itemset}, Support: {support}")

print("\nNumber of sets for each size exceeding min_support:")
for length, count in itemset_count.items():
    print(f"{length}-element sets: {count}")

print("\nAssociation rules exceeding min_confidence:")
for antecedent, consequent, confidence in rules:
    print(f"Rule: {antecedent} -> {consequent}, Confidence: {round(confidence, 2)}")


KeyboardInterrupt: 

In [31]:
import numpy as np
import requests

def load_data_from_url(url):
    transactions = []
    unique_items = set()
    with requests.get(url, stream=True) as response:
        for line in response.iter_lines(decode_unicode=True):
            if line:
                transaction = [item.decode('utf-8') if isinstance(item, bytes) else item for item in line.split()]
                transactions.append(transaction)
                unique_items.update(transaction)
    return transactions, list(unique_items)

# Vytvoření binární matice
def create_binary_matrix(transactions, unique_items):
    item_index = {item: idx for idx, item in enumerate(unique_items)}
    binary_matrix = np.zeros((len(transactions), len(unique_items)), dtype=int)

    for i, transaction in enumerate(transactions):
        for item in transaction:
            binary_matrix[i][item_index[item]] = 1
    return binary_matrix, item_index

# Výpočet podpory (Support) pomocí binární matice
def calculate_support(itemset_indices, binary_matrix):
    support_count = np.sum(np.all(binary_matrix[:, itemset_indices] == 1, axis=1))
    return support_count / len(binary_matrix)

# Generování kandidátských kombinací
def generate_combinations(items, length):
    from itertools import combinations
    return list(combinations(items, length))

# Filtrování častých množin podle podpory
def filter_frequent_itemsets(candidates, binary_matrix, min_support):
    frequent_itemsets = []
    for itemset in candidates:
        support = calculate_support(itemset, binary_matrix)
        if support >= min_support:
            frequent_itemsets.append((itemset, support))
    return frequent_itemsets

# Generování asociačních pravidel
def generate_rules(frequent_itemsets, binary_matrix, min_confidence, item_index):
    rules = []
    for itemset, support in frequent_itemsets:
        if len(itemset) < 3:  # Pravidla generujeme pouze pro množiny s více než jedním prvkem
            for i in range(1, len(itemset)):
                antecedents = generate_combinations(itemset, i)
                for antecedent in antecedents:
                    consequent = list(set(itemset) - set(antecedent))
                    antecedent_support = calculate_support(antecedent, binary_matrix)
                    if antecedent_support > 0:
                        confidence = support / antecedent_support
                        if confidence >= min_confidence:
                            rules.append((antecedent, consequent, confidence))
    return rules

# Algoritmus Apriori nad binární maticí
def apriori(transactions, unique_items, min_support, min_confidence):
    binary_matrix, item_index = create_binary_matrix(transactions, unique_items)
    items = list(range(len(unique_items)))
    all_frequent_itemsets = []
    itemset_count = {}
    length = 1
    while True:
        candidates = generate_combinations(items, length)
        frequent_itemsets = filter_frequent_itemsets(candidates, binary_matrix, min_support)

        if not frequent_itemsets:
            break

        itemset_count[length] = len(frequent_itemsets)
        all_frequent_itemsets.extend(frequent_itemsets)
        length += 1

    rules = generate_rules(all_frequent_itemsets, binary_matrix, min_confidence, item_index)

    return all_frequent_itemsets, rules, itemset_count, item_index

#url = 'https://homel.vsb.cz/~pro0199/files/data_association_rules/itemsets_test.dat'
url = 'https://homel.vsb.cz/~pro0199/files/data_association_rules/chess.dat'

transactions, unique_items = load_data_from_url(url)
min_support = 0.95
min_confidence = 0.5

frequent_itemsets, rules, itemset_count, item_index = apriori(transactions, unique_items, min_support, min_confidence)

print("Frequent sets:")
for itemset, support in frequent_itemsets:
    items = [unique_items[idx] for idx in itemset]
    print(f"Set: {items}, Support: {support}")

print("\nNumber of sets for each size exceeding min_support:")
for length, count in itemset_count.items():
    print(f"{length}-element sets: {count}")

print("\nAssociation rules exceeding min_confidence:")
for antecedent, consequent, confidence in rules:
    antecedent_items = [unique_items[idx] for idx in antecedent]
    consequent_items = [unique_items[idx] for idx in consequent]
    print(f"Rule: {antecedent_items} -> {consequent_items}, Confidence: {round(confidence, 2)}")


KeyboardInterrupt: 