In [None]:
from itertools import chain, combinations

def generate_candidates(prev_candidates, k):
    """Generates k-itemsets candidates from frequent (k-1)-itemsets."""
    candidates = set()

    for itemset1 in prev_candidates:
        for itemset2 in prev_candidates:
            union_set = itemset1.union(itemset2)
            if len(union_set) == k:
                candidates.add(union_set)

    return list(candidates)

def calculate_support(transactions, candidates):
    """Calculates the support of k-itemsets candidates."""
    support_count = {}

    for transaction in transactions:
        for candidate in candidates:
            if candidate.issubset(transaction):
                support_count[candidate] = support_count.get(candidate, 0) + 1

    return support_count

def generate_frequent_itemsets(candidates, min_support, transactions):
    """Generates frequent k-itemsets from k-itemsets candidates."""
    frequent_itemsets = []
    support_count = calculate_support(transactions, candidates)

    for itemset, support in support_count.items():
        if support >= min_support:
            frequent_itemsets.append((itemset, support))

    return frequent_itemsets

def apriori(transactions, min_support, max_k):
    """Apriori algorithm implementation."""
    items = [frozenset(i) for i in transactions]
    frequent_itemsets_dict = {}

    for k in range(1, max_k + 1):
        candidates = generate_candidates(items, k)
        
        if not candidates:
            print("No itemsets generated for k =", k)
            break

        frequent_itemsets = generate_frequent_itemsets(candidates, min_support, items)
        frequent_itemsets_dict[k] = frequent_itemsets

    return frequent_itemsets_dict

# Example usage
min_support = 2
max_k = 20  # You can set this to an appropriate maximum value
frequent_itemsets_result = apriori(data['Discretized'], min_support, max_k)

# Print the result
for k, frequent_itemsets in frequent_itemsets_result.items():
    print("Frequent L{}:".format(k))
    for itemset, support in frequent_itemsets:
        print("{}: {}".format(itemset, support))

No itemsets generated for k = 1


In [None]:

import itertools
def generate_association_rules(L, min_confidence, transactions):
    association_rules = []
    def calculate_support(itemset, transactions):
        count = 0
        for transaction in transactions:
            if itemset.issubset(transaction):
                count += 1
        return count
    for itemset in L:
        itemset_list = list(itemset)
        for i in range(1, len(itemset_list)):
            for combination in itertools.combinations(itemset_list, i):
                A = set(combination)
                B = itemset - A
                support_A = calculate_support(A, transactions)
                support_AB = calculate_support(itemset, transactions)
                confidence = support_AB / support_A
                if confidence >= min_confidence:
                    association_rules.append((A, B, confidence))
    return association_rules

min_confidence = 0.6

association_rules = generate_association_rules(frequent_itemsets, min_confidence, data)

print("Règles d'association:")
for rule in association_rules:
    A, B, confidence = rule
    print(f"{list(A)} => {list(B)} - Conf: {confidence}")

NameError: name 'frequent_itemsets' is not defined

In [None]:
data = []

for index, row in grouped.iterrows():
    video_categories = row['Discretized']
    data.append(set(video_categories))
min_support = 7
frequent_itemsets = apriori(data, min_support)
print("Ensembles fréquents:")
for itemset in frequent_itemsets:
    print(f"{list(itemset)} - Support: {calculate_support(data, itemset)}")

TypeError: apriori() missing 1 required positional argument: 'max_k'

In [None]:
from utils import generate_association_rules

In [None]:
min_confidence = 0.6
association_rules = generate_association_rules(frequent_itemsets, min_confidence, data)
pd.DataFrame(association_rules, columns = ["A","B","Confidence","Lift","correlation","cosine similarity"])
#print("Règles d'association:")
#for rule in association_rules:
#    A, B, confidence, lift, correlation, cosine_similarity = rule
#    print(f"{list(A)} => {list(B)} - Conf: {confidence} - Lift: {lift} - Correlation: {correlation} - Cosine Similarity: {cosine_similarity}")
    

NameError: name 'frequent_itemsets' is not defined

In [None]:
transactional_data = df_after.groupby('Soil').agg({
    'Discretized': set,
}).reset_index()
transactional_data.to_csv('Dataset-Exos2_bis.csv', index=False)
transactional_data

Unnamed: 0,Soil,Discretized
0,Clayey,"{3, 9, 8, 6, 4, 7, 5}"
1,alluvial,"{3, 1, 5, 6, 4, 2}"
2,clay loam,"{3, 9, 8, 6, 4, 7, 5}"
3,coastal,"{3, 1, 5, 6, 4, 2}"
4,laterite,"{3, 1, 5, 6, 4, 2}"
5,sandy,"{3, 1, 5, 6, 4, 2}"
6,silty clay,"{3, 9, 8, 6, 4, 7, 5}"


In [None]:


def generate_association_rules(frequent_itemsets_dict):
    """Generate all association rules from frequent itemsets."""
    association_rules = []

    for k, frequent_itemsets in frequent_itemsets_dict.items():
        for itemset, support in frequent_itemsets:
            if len(itemset) > 1:
                # Generate all possible combinations of items in the itemset
                item_combinations = chain(*[combinations(itemset, i) for i in range(1, len(itemset))])

                for antecedent in item_combinations:
                    antecedent = frozenset(antecedent)
                    consequent = itemset - antecedent
                    association_rules.append((antecedent, consequent, support))

    return association_rules

def calculate_confidence(rule, frequent_itemsets_dict):
    """Calculate the confidence of an association rule."""
    antecedent, consequent, support = rule

    k = len(antecedent) + len(consequent)

    frequent_itemsets = frequent_itemsets_dict.get(k, [])
    
    antecedent_support = next((s for items, s in frequent_itemsets if items == antecedent), None)

    if antecedent_support is None:
        return 0.0  # Antecedent not found in frequent itemsets

    confidence = support / antecedent_support
    return confidence

# Example usage
min_support = 2
max_k = 10
frequent_itemsets_result = apriori(data['Discretized'], min_support, max_k)

# Generate association rules
association_rules = generate_association_rules(frequent_itemsets_result)

# Print association rules with confidence
for rule in association_rules:
    antecedent, consequent, support = rule
    confidence = calculate_confidence(rule, frequent_itemsets_result)
    print(f"Rule: {antecedent} => {consequent}, Support: {support}, Confidence: {confidence}")


No itemsets generated for k = 1


In [None]:
min_support = 2
max_k = 3
frequent_itemsets_result = apriori(data, min_support, max_k)

# Print the result
for k, frequent_itemsets in frequent_itemsets_result.items():
    print("Frequent L{}:".format(k))
    for itemset, support in frequent_itemsets:
        print("{}: {}".format(itemset, support))

AttributeError: 'str' object has no attribute 'union'

In [None]:
from itertools import chain, combinations
import ast

def parse_set_string(set_string):
    """Parse string representation of a set into an actual set."""
    return {float(x) for x in set_string.strip('{}').split(', ')}

def generate_candidates(prev_candidates, k):
    """Generates k-itemsets candidates from frequent (k-1)-itemsets."""
    candidates = set()

    for itemset1 in prev_candidates:
        for itemset2 in prev_candidates:
            union_set = itemset1.union(itemset2)
            if len(union_set) == k:
                candidates.add(union_set)

    return list(candidates)

def calculate_support(transactions, candidates):
    """Calculates the support of k-itemsets candidates."""
    support_count = {}

    for transaction in transactions:
        for candidate in candidates:
            if candidate.issubset(transaction):
                support_count[candidate] = support_count.get(candidate, 0) + 1

    return support_count

def generate_frequent_itemsets(candidates, min_support, transactions):
    """Generates frequent k-itemsets from k-itemsets candidates."""
    frequent_itemsets = []
    support_count = calculate_support(transactions, candidates)

    for itemset, support in support_count.items():
        if support >= min_support:
            frequent_itemsets.append((itemset, support))

    return frequent_itemsets

def apriori(data, min_support, max_k):
    """Apriori algorithm implementation for DataFrame."""
    data['Discretized'] = data['Discretized'].apply(parse_set_string)
    transactions = data['Discretized'].tolist()
    unique_items = set()
    for transaction in transactions:
        unique_items.update(transaction)

    frequent_itemsets_dict = {}

    for k in range(1, max_k + 1):
        candidates = generate_candidates(unique_items, k)
        
        if not candidates:
            print("No itemsets generated for k =", k)
            break

        frequent_itemsets = generate_frequent_itemsets(candidates, min_support, transactions)
        frequent_itemsets_dict[k] = frequent_itemsets

    return frequent_itemsets_dict

# Example usage
min_support = 2
max_k = 3  # You can set this to an appropriate maximum value
frequent_itemsets_result = apriori(grouped, min_support, max_k)

# Print the result
for k, frequent_itemsets in frequent_itemsets_result.items():
    print("Frequent L{}:".format(k))
    for itemset, support in frequent_itemsets:
        print("{}: {}".format(itemset, support))


AttributeError: 'set' object has no attribute 'strip'