In [2]:
import requests
import os
from collections import Counter, defaultdict
from itertools import chain, combinations

In [2]:
def download_document(file_name, document_url):

   if os.path.exists(file_name):

       pass

   else:

       response = requests.get(document_url)

       if response.status_code == 200:

           with open(file_name, 'wb') as f:

               f.write(response.content)

       else:

           print(f'Failed to download the document. Status code: {response.status_code}')

In [3]:
file_name = 'orders.txt'

document_url = 'https://drive.google.com/uc?id=1IOPTVq2ooQfZRkF3rAjGkTjRtbotG7FF'

download_document(file_name, document_url)

In [8]:
def read_orders(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        orders = file.read().split('\n\n')
        orders = [order.split('@@@') for order in orders]
    return orders

def count_products(orders):
    product_count = Counter(chain.from_iterable(orders))
    return product_count

def calculate_support(orders, product1, product2):
    support_count = sum(1 for order in orders if product1 in order and product2 in order)
    support = support_count / len(orders)
    return support

def generate_association_rules(orders, min_confidence, min_support):
    product_count = count_products(orders)

    for order in orders:
        for product1, product2 in combinations(order, 2):
            support = calculate_support(orders, product1, product2)
            if support >= min_support:
                confidence1 = support / product_count[product1]
                confidence2 = support / product_count[product2]

                if confidence1 >= min_confidence:
                    print(f"{product1} => {product2} ({confidence1:.2f}% confidence), {int(support*len(orders))} support")
                if confidence2 >= min_confidence:
                    print(f"{product2} => {product1} ({confidence2:.2f}% confidence), {int(support*len(orders))} support")

In [5]:
orders = read_orders('orders.txt')
min_confidence = 0.45
min_support = 0.15

In [None]:
generate_association_rules(orders, min_confidence, min_support)

In [5]:
import requests
import os
from collections import defaultdict, Counter
from itertools import combinations

def download_document(file_name, document_url):
    if os.path.exists(file_name):
        print('File already downloaded.')
    else:
        response = requests.get(document_url)
        if response.status_code == 200:
            with open(file_name, 'wb') as f:
                f.write(response.content)
            print('File downloaded successfully.')
        else:
            print(f'Failed to download the document. Status code: {response.status_code}')

def read_orders(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        orders = file.read().split('\n\n')
        orders = [order.split('@@@') for order in orders]
    return orders

def count_products(orders):
    return Counter(chain.from_iterable(orders))

def get_frequent_itemsets(orders, min_support):
    total_orders = len(orders)
    itemsets = defaultdict(int)
    
    # Count individual items
    for order in orders:
        for item in set(order):
            itemsets[frozenset([item])] += 1
    
    # Filter out non-frequent itemsets
    itemsets = {itemset: count for itemset, count in itemsets.items() if count / total_orders >= min_support}
    
    k = 2
    while True:
        new_combinations = defaultdict(int)
        
        for order in orders:
            for combination in combinations(set(order), k):
                comb_set = frozenset(combination)
                if all(frozenset(subset) in itemsets for subset in combinations(comb_set, k-1)):
                    new_combinations[comb_set] += 1
        
        new_combinations = {itemset: count for itemset, count in new_combinations.items() if count / total_orders >= min_support}
        
        if not new_combinations:
            break
        
        itemsets.update(new_combinations)
        k += 1
    
    return itemsets

def generate_association_rules(orders, min_confidence, min_support):
    itemsets = get_frequent_itemsets(orders, min_support)
    total_orders = len(orders)
    
    rules = []
    
    for itemset in itemsets:
        if len(itemset) > 1:
            for antecedent in combinations(itemset, len(itemset) - 1):
                antecedent = frozenset(antecedent)
                consequent = itemset - antecedent
                support = itemsets[itemset] / total_orders
                confidence = itemsets[itemset] / itemsets[antecedent]
                
                if confidence >= min_confidence:
                    rules.append((antecedent, consequent, confidence, support))
    
    for rule in rules:
        antecedent, consequent, confidence, support = rule
        print(f"{set(antecedent)} => {set(consequent)} ({confidence:.2f}% confidence), {int(support * total_orders)} support")

file_name = 'orders.txt'
document_url = 'https://drive.google.com/uc?id=1IOPTVq2ooQfZRkF3rAjGkTjRtbotG7FF'

download_document(file_name, document_url)

# Зчитування та обробка даних
orders = read_orders(file_name)
min_confidence = 0.45
min_support = 0.15


File downloaded successfully.


In [6]:
# Генерація асоціативних правил
generate_association_rules(orders, min_confidence, min_support)

In [12]:
import requests
import os
from collections import defaultdict, Counter
from itertools import combinations, chain

def download_document(file_name, document_url):
    if os.path.exists(file_name):
        print('File already downloaded.')
    else:
        response = requests.get(document_url)
        if response.status_code == 200:
            with open(file_name, 'wb') as f:
                f.write(response.content)
            print('File downloaded successfully.')
        else:
            print(f'Failed to download the document. Status code: {response.status_code}')

def read_orders(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        orders = file.read().strip().split('\n\n')
        orders = [order.split('@@@') for order in orders]
    return orders

def count_products(orders):
    return Counter(chain.from_iterable(orders))

def get_frequent_itemsets(orders, min_support):
    total_orders = len(orders)
    itemsets = defaultdict(int)
    
    for order in orders:
        for item in set(order):
            itemsets[frozenset([item])] += 1
    
    itemsets = {itemset: count for itemset, count in itemsets.items() if count / total_orders >= min_support}
    
    print(f"Initial frequent itemsets: {itemsets}")

    k = 2
    while True:
        new_combinations = defaultdict(int)
        
        for order in orders:
            for combination in combinations(set(order), k):
                comb_set = frozenset(combination)
                if all(frozenset(subset) in itemsets for subset in combinations(comb_set, k-1)):
                    new_combinations[comb_set] += 1
        
        new_combinations = {itemset: count for itemset, count in new_combinations.items() if count / total_orders >= min_support}
        
        if not new_combinations:
            break
        
        itemsets.update(new_combinations)
        k += 1
    
    print(f"Frequent itemsets: {itemsets}")
    return itemsets

def generate_association_rules(orders, min_confidence, min_support):
    itemsets = get_frequent_itemsets(orders, min_support)
    total_orders = len(orders)
    
    rules = []
    
    for itemset in itemsets:
        if len(itemset) > 1:
            for antecedent in combinations(itemset, len(itemset) - 1):
                antecedent = frozenset(antecedent)
                consequent = itemset - antecedent
                support = itemsets[itemset] / total_orders
                confidence = itemsets[itemset] / itemsets[antecedent]
                
                if confidence >= min_confidence:
                    rules.append((antecedent, consequent, confidence, support))
    
    if not rules:
        print("No association rules found.")
    else:
        for rule in rules:
            antecedent, consequent, confidence, support = rule
            print(f"{set(antecedent)} => {set(consequent)} ({confidence*100:.2f}% confidence), {int(support * total_orders)} support")

file_name = 'orders.txt'
document_url = 'https://drive.google.com/uc?id=1IOPTVq2ooQfZRkF3rAjGkTjRtbotG7FF'

download_document(file_name, document_url)

orders = read_orders(file_name)
min_confidence = 0.45
min_support = 0.15

generate_association_rules(orders, min_confidence, min_support)


File already downloaded.
Initial frequent itemsets: {}
Frequent itemsets: {}
No association rules found.


In [1]:
import requests
import os
from collections import Counter, defaultdict
from itertools import combinations, chain

def download_document(file_name, document_url):
    if os.path.exists(file_name):
        print('File already downloaded.')
    else:
        response = requests.get(document_url)
        if response.status_code == 200:
            with open(file_name, 'wb') as f:
                f.write(response.content)
            print('File downloaded successfully.')
        else:
            print(f'Failed to download the document. Status code: {response.status_code}')

def read_orders(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        orders = file.read().strip().split('\n\n')
        orders = [order.split('@@@') for order in orders]
    return orders

def count_products(orders):
    return Counter(chain.from_iterable(orders))

def count_product_pairs(orders):
    pair_count = defaultdict(int)
    for order in orders:
        for pair in combinations(set(order), 2):
            pair = tuple(sorted(pair))
            pair_count[pair] += 1
    return pair_count

def generate_association_rules(orders, min_confidence, min_support):
    total_orders = len(orders)
    product_count = count_products(orders)
    pair_count = count_product_pairs(orders)
    
    rules = []
    
    for (product1, product2), count in pair_count.items():
        support = count / total_orders
        
        if support >= min_support:
            confidence1 = count / product_count[product1]
            confidence2 = count / product_count[product2]
            
            if confidence1 >= min_confidence:
                rules.append((product1, product2, confidence1, support))
            if confidence2 >= min_confidence:
                rules.append((product2, product1, confidence2, support))
    
    if not rules:
        print("No association rules found.")
    else:
        for product1, product2, confidence, support in rules:
            print(f"{product1} => {product2} ({confidence*100:.2f}% confidence), {int(support * total_orders)} support")

file_name = 'orders.txt'
document_url = 'https://drive.google.com/uc?id=1IOPTVq2ooQfZRkF3rAjGkTjRtbotG7FF'

download_document(file_name, document_url)

# Зчитування та обробка даних
orders = read_orders(file_name)
min_confidence = 0.45
min_support = 0.15

# Генерація асоціативних правил
generate_association_rules(orders, min_confidence, min_support)


File already downloaded.
No association rules found.


In [4]:
import requests
import os
from collections import Counter, defaultdict
from itertools import combinations, chain

def download_document(file_name, document_url):
    if os.path.exists(file_name):
        print('File already downloaded.')
    else:
        response = requests.get(document_url)
        if response.status_code == 200:
            with open(file_name, 'wb') as f:
                f.write(response.content)
            print('File downloaded successfully.')
        else:
            print(f'Failed to download the document. Status code: {response.status_code}')

def read_orders(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        orders = file.read().strip().split('\n\n')
        orders = [order.split('@@@') for order in orders]
    return orders

def count_products(orders):
    return Counter(chain.from_iterable(orders))

def count_product_pairs(orders):
    pair_count = defaultdict(int)
    for order in orders:
        for pair in combinations(set(order), 2):
            pair = tuple(sorted(pair))
            pair_count[pair] += 1
    return pair_count

    
def generate_association_rules(product_pairs, min_support, min_confidence):
    total_orders = len(orders)
    product_count = count_products(orders)
    pair_count = count_product_pairs(orders)
    
    print(f"Total orders: {total_orders}")
    print(f"Number of unique products: {len(product_count)}")
    print(f"Found {len(pair_count)} pairs of products from {total_orders} orders")

    for (x, y), support in product_pairs.items():
        # Обчислення достовірності
        support_x = sum([v for (a, b), v in product_pairs.items() if a == x or b == x])
        support_y = sum([v for (a, b), v in product_pairs.items() if a == y or b == y])
        
        confidence_x_to_y = (support / support_x) * 100 if support_x > 0 else 0
        confidence_y_to_x = (support / support_y) * 100 if support_y > 0 else 0
        
        # Перевірка мінімального порогу підтримки
        if support >= min_support:
            # Перевірка достовірності та виведення правил
            if confidence_x_to_y >= min_confidence:
                print(f"Rule: {x} => {y}, support: {support}, confidence: {confidence_x_to_y:.2f}%")
            if confidence_y_to_x >= min_confidence:
                print(f"Rule: {y} => {x}, support: {support}, confidence: {confidence_y_to_x:.2f}%")

file_name = 'orders.txt'
document_url = 'https://drive.google.com/uc?id=1IOPTVq2ooQfZRkF3rAjGkTjRtbotG7FF'

download_document(file_name, document_url)

# Зчитування та обробка даних
orders = read_orders(file_name)
min_confidence = 45
min_support = 15

# Генерація асоціативних правил
generate_association_rules(orders, min_confidence, min_support)

File already downloaded.
Total orders: 131209
Number of unique products: 39123
Found 5711806 pairs of products from 131209 orders


AttributeError: 'list' object has no attribute 'items'

In [3]:
import requests
import os
from collections import Counter, defaultdict
from itertools import chain, combinations

def download_document(file_name, document_url):
    if os.path.exists(file_name):
        print('File already downloaded.')
    else:
        response = requests.get(document_url)
        if response.status_code == 200:
            with open(file_name, 'wb') as f:
                f.write(response.content)
            print('File downloaded successfully.')
        else:
            print(f'Failed to download the document. Status code: {response.status_code}')

def read_orders(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        orders = file.read().strip().split('\n\n')
        orders = [order.split('@@@') for order in orders]
    return orders

def count_products(orders):
    return Counter(chain.from_iterable(orders))

def count_product_pairs(orders):
    pair_count = defaultdict(int)
    for order in orders:
        for pair in combinations(set(order), 2):
            pair = tuple(sorted(pair))
            pair_count[pair] += 1
    return pair_count

def generate_association_rules(products, product_pairs, min_confidence, min_support, total_orders):
    rules = []
    
    for (product1, product2), pair_count in product_pairs.items():
        support = pair_count / total_orders
        
        if support >= min_support:
            confidence1 = pair_count / products[product1]
            confidence2 = pair_count / products[product2]
            
            if confidence1 >= min_confidence:
                rules.append((product1, product2, confidence1, support))
            if confidence2 >= min_confidence:
                rules.append((product2, product1, confidence2, support))
    
    if not rules:
        print("No association rules found.")
    else:
        for product1, product2, confidence, support in rules:
            print(f"{product1} => {product2} ({confidence*100:.2f}% confidence), {int(support * total_orders)} support")

file_name = 'orders.txt'
document_url = 'https://drive.google.com/uc?id=1IOPTVq2ooQfZRkF3rAjGkTjRtbotG7FF'

download_document(file_name, document_url)

# Зчитування та обробка даних
orders = read_orders(file_name)
min_confidence = 0.45
min_support = 0.15

# Підрахунок частот товарів та пар товарів
products = count_products(orders)
product_pairs = count_product_pairs(orders)
total_orders = len(orders)

print(f"Total orders: {total_orders}")
print(f"Number of unique products: {len(products)}")
print(f"Found {len(product_pairs)} pairs of products from {total_orders} orders")

# Генерація асоціативних правил
generate_association_rules(products, product_pairs, min_confidence, min_support, total_orders)

File already downloaded.
Total orders: 131209
Number of unique products: 39123
Found 5711806 pairs of products from 131209 orders
No association rules found.


In [6]:
import requests
import os
from collections import Counter, defaultdict
from itertools import chain, combinations

def download_document(file_name, document_url):
    if os.path.exists(file_name):
        print('File already downloaded.')
    else:
        response = requests.get(document_url)
        if response.status_code == 200:
            with open(file_name, 'wb') as f:
                f.write(response.content)
            print('File downloaded successfully.')
        else:
            print(f'Failed to download the document. Status code: {response.status_code}')

def read_orders(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        orders = file.read().strip().split('\n\n')
        orders = [order.split('@@@') for order in orders]
    return orders

def count_products(orders):
    return Counter(chain.from_iterable(orders))

def count_product_pairs(orders):
    pair_count = defaultdict(int)
    for order in orders:
        for pair in combinations(set(order), 2):
            pair = tuple(sorted(pair))
            pair_count[pair] += 1
    return pair_count
'''
def generate_association_rules(orders, min_support, min_confidence):
    # Створення словника для підрахунку пар продуктів
    product_pairs = {}

    # Перебір кожного замовлення
    for order in orders:
        # Створення всіх можливих пар продуктів у замовленні
        for pair in combinations(order, 2):
            # Сортування пари для уникнення дублікатів
            pair = tuple(sorted(pair))
            pair_key = f"{pair[0]} <=> {pair[1]}"
            if pair_key not in product_pairs:
                product_pairs[pair_key] = 0
            product_pairs[pair_key] += 1

    # Підрахунок підтримки для кожного окремого продукту
    product_support = {}
    for order in orders:
        for product in set(order):
            if product not in product_support:
                product_support[product] = 0
            product_support[product] += 1

    # Генерація асоціативних правил
    for pair_key, support in product_pairs.items():
        # Перевірка мінімальної підтримки
        if support >= min_support:
            products = pair_key.split(" <=> ")
            x, y = products[0], products[1]

            # Обчислення достовірності
            confidence_x_to_y = (support / product_support[x]) * 100
            confidence_y_to_x = (support / product_support[y]) * 100

            # Виведення правил з перевіркою мінімальної достовірності
            if confidence_x_to_y >= min_confidence:
                print(f"Rule: {x} => {y}, support: {support}, confidence: {confidence_x_to_y:.2f}%")
            if confidence_y_to_x >= min_confidence:
                print(f"Rule: {y} => {x}, support: {support}, confidence: {confidence_y_to_x:.2f}%")
'''
def generate_association_rules(orders, min_support, min_confidence):
    # Створення словника для підрахунку пар продуктів
    product_pairs = {}

    # Перебір кожного замовлення
    for order in orders:
        # Створення всіх можливих пар продуктів у замовленні
        for pair in combinations(order, 2):
            # Сортування пари для уникнення дублікатів
            pair = tuple(sorted(pair))
            pair_key = f"{pair[0]} <=> {pair[1]}"
            if pair_key not in product_pairs:
                product_pairs[pair_key] = 0
            product_pairs[pair_key] += 1

    # Підрахунок підтримки для кожного окремого продукту
    product_support = {}
    for order in orders:
        for product in set(order):
            if product not in product_support:
                product_support[product] = 0
            product_support[product] += 1

    # Лічильник для кількості знайдених асоціацій
    association_count = 0

    # Генерація асоціативних правил
    for pair_key, support in product_pairs.items():
        # Перевірка мінімальної підтримки
        if support >= min_support:
            products = pair_key.split(" <=> ")
            x, y = products[0], products[1]

            # Обчислення достовірності
            confidence_x_to_y = (support / product_support[x]) * 100
            confidence_y_to_x = (support / product_support[y]) * 100

            # Виведення правил з перевіркою мінімальної достовірності
            if confidence_x_to_y >= min_confidence:
                print(f"Rule: {x} => {y}, support: {support}, confidence: {confidence_x_to_y:.2f}%")
                association_count += 1
            if confidence_y_to_x >= min_confidence:
                print(f"Rule: {y} => {x}, support: {support}, confidence: {confidence_y_to_x:.2f}%")
                association_count += 1

    # Виведення кількості знайдених асоціацій
    print(f"Total associations found: {association_count}")

file_name = 'orders.txt'
document_url = 'https://drive.google.com/uc?id=1IOPTVq2ooQfZRkF3rAjGkTjRtbotG7FF'

download_document(file_name, document_url)

# Зчитування та обробка даних
orders = read_orders(file_name)
min_confidence = 45
min_support = 15

# Підрахунок частот товарів та пар товарів
products = count_products(orders)
product_pairs = count_product_pairs(orders)
total_orders = len(orders)

print(f"Total orders: {total_orders}")
print(f"Number of unique products: {len(products)}")
print(f"Found {len(product_pairs)} pairs of products from {total_orders} orders")

generate_association_rules(orders, min_support, min_confidence)

File already downloaded.
Total orders: 131209
Number of unique products: 39123
Found 5711806 pairs of products from 131209 orders
Rule: Natural Uncured Turkey Hot Dog => Banana, support: 21, confidence: 45.65%
Rule: Danimals Strawberry Explosion & Strikin' Strawberry Kiwi Smoothies => Banana, support: 20, confidence: 48.78%
Rule: Hot Kid Organic Baby Mum-Mum Original Rice Rusks => Bag of Organic Bananas, support: 17, confidence: 54.84%
Rule: Almond Milk Peach Yogurt => Almond Milk Blueberry Yogurt, support: 78, confidence: 48.75%
Rule: Grain Free Turkey Formula Cat Food => Grain Free Chicken Formula Cat Food, support: 20, confidence: 60.61%
Rule: Grain Free Turkey & Salmon Formula Cat Food => Grain Free Chicken Formula Cat Food, support: 20, confidence: 47.62%
Rule: Peanut Butter & Coconut Bar => Peanut Butter Bar, support: 17, confidence: 50.00%
Rule: Peaches Large => Banana, support: 25, confidence: 52.08%
Rule: Natural Lemon Flavored Sparkling Water => Lemon Sparkling Water, support