In [3]:
import numpy as np
import pandas as pd
from template import *
from itertools import chain, combinations
from datetime import datetime
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

### Utils

In [7]:
def eclat(db, minsup):
    start_time=datetime.now()
    time_format="%H:%M:%S:%f"
    temporal_items_list = []
    for t in db:
        temporal_items_list.extend(t)
    temporal_items_list = list(sorted(set(temporal_items_list)))
    item_list = temporal_items_list.copy()
    item_list_transactions = {item: [] for item in temporal_items_list}
    for transaction_index, t in enumerate(db):
        for transaction_item in t:
            item_list_transactions[(transaction_item)].append(transaction_index)
    itemset_supports = [([item], item_list_transactions[item]) for item in item_list]
    items_temporal=itemset_supports.copy()
    temporal_items_list = item_list.copy()
    
    for itemset in itemset_supports:
        if len(itemset[1]) < minsup:
            temporal_items_list.remove(itemset[0][0])
            items_temporal.remove(itemset)
    itemset_supports=items_temporal.copy()
    item_list = temporal_items_list.copy()
    
    final_supports_list=supportsNextLevel(itemset_supports,itemset_supports, item_list, minsup)
    final_supports=[(item,len(transactions)) for item,transactions in final_supports_list]
    final_time=datetime.now()
    execution_time=(final_time-start_time)
    print(f'Runtime: {execution_time}\n    Start Time: {start_time.strftime(time_format)}\n    Finish Time: {final_time.strftime(time_format)}')
    return final_supports

    
def supportsNextLevel(total_supports, supports, items, minsup, first_time=True):
    for index, itemset in enumerate(items[:-1]):
        level_supports = []
        itemset_derivate_list = []
        items_to_combine = items[index+1:]
        for other_itemset in items_to_combine:
            if first_time:
                new_itemset_support = findSupport(supports, [itemset], [other_itemset])
                new_itemset_list = sorted(list(set([itemset, other_itemset])))
            else:
                new_list = []
                new_list.extend(itemset)
                new_list.extend(other_itemset)
                new_itemset_support = findSupport(supports, itemset, other_itemset)
                new_itemset_list = sorted(list(set(new_list)))
            if len(new_itemset_support) >= minsup:
                level_supports.append((new_itemset_list, new_itemset_support))
                itemset_derivate_list.append(new_itemset_list)   
        total_supports.extend(level_supports)
        if len(level_supports) > 1 and level_supports:
            supportsNextLevel(total_supports, level_supports, itemset_derivate_list, minsup, False)
    if first_time:
        return total_supports


def findSupport(supports, first_itemset, second_itemset):
    for support in supports:
        if first_itemset == support[0]:
            first_support = support[1]
        if second_itemset == support[0]:
            second_support = support[1]
    return list(set(first_support) & set(second_support))

def getStrongRulesFromFrequentSets(fsets, minconf):
   strong_rules = []
   fsets_sets = [fset[0] for fset in fsets]
   fsets_supports = [fset[1] for fset in fsets]

   for i, set in enumerate(fsets_sets):
        if len(set) >= 2:
           A = subsets_non_empty(set)
           while A:
               X = max(A, key=lambda x: fsets_supports[fsets_sets.index(x)])
               A.remove(X)
               index = fsets_sets.index(X)
               sup_z = fsets_supports[i]
               sup_x = fsets_supports[index]
               conf = sup_z / sup_x
               if conf >= minconf:
                   Y = set.copy()
                   for x in X:
                       Y.remove(x)
                   strong_rules.append((X, Y, fsets_supports[i], conf))
               else: 
                  for w in X:
                     if w in A:
                        A.remove(x)                
   return strong_rules

def subsets_non_empty(Z):
   all_subsets = chain.from_iterable(combinations(Z,r) for r in range(1, len(Z) + 1)) 
   non_empty_subsets = [sorted(subset) for subset in all_subsets if subset]
   non_empty_subsets.sort(key=lambda x: (len(x), x))
   non_empty_subsets.pop(-1)
   return non_empty_subsets

def getStrongRulesForDatabase(db, minsup, minconf):
  fsets = eclat(db, minsup)
  strong_rules = getStrongRulesFromFrequentSets(fsets, minconf)
  return strong_rules

# Exercise 1

# Exercise 2


## Exercise 2.1

In [139]:
example_database=[["A","B","D","E"],
                  ["B","C","E"],
                  ["A","B","D","E"],  
                  ["A","B","C","E"],
                  ["A","B","C","D","E"],
                  ["B","C","D"]]
example_premis=["E","B"]
example_conclusion=["D"]
metrics = ["sup", "conf", "lift", "leverage", "jaccard", "conviction", "oddsratio", "imp"]


In [140]:
def sup(D, X, Y=None):
    complete_rule_items = X.copy()
    support = 0
    if Y is None:
        for transaction in D:
            if all(item in transaction for item in X):
                support += 1
    else:
        complete_rule_items.extend(Y)
        for transaction in D:
            if all(item in transaction for item in complete_rule_items):
                support += 1
    return support

In [141]:
def conf(D,X,Y):
    pre_support=0
    complete_rule_support=0
    for transaction in D:
        if all(item in transaction for item in X):
            pre_support+=1
            if all(item in transaction for item in Y):
                complete_rule_support+=1
    confidence=complete_rule_support/pre_support
    return confidence

In [142]:
def lift(D, X, Y=None):
    if Y is None:
        Y=X.copy()
    supports = [0, 0, 0]
    for transaction in D:
        if all(item in transaction for item in X):
            supports[0] += 1
            if all(item in transaction for item in Y):
                supports[2] += 1
        if all(item in transaction for item in Y):
            supports[1] += 1
    r_supports = list(map(lambda x: x / len(D), supports))
    if 0 in r_supports:
        return None
    else:
        final_lift = r_supports[2] / (r_supports[0] * r_supports[1])
        return final_lift

In [143]:
def leverage(D,X,Y):
    supports=[0,0,0]
    for transaction in D:
        if all(item in transaction for item in X):
            supports[0]+=1
            if all(item in transaction for item in Y):
                supports[2]+=1
        if all(item in transaction for item in Y):
            supports[1]+=1
    r_supports=list(map(lambda x: x/len(D), supports))
    final_leverage=r_supports[2]-r_supports[0]*r_supports[1]
    return(final_leverage)

In [144]:
def jaccard(D,X,Y):
    #0->X	1->Y	2->XY
    supports=[0,0,0]
    for transaction in D:
        if all(item in transaction for item in X):
            supports[0]+=1
            if all(item in transaction for item in Y):
                supports[2]+=1
        if all(item in transaction for item in Y):
            supports[1]+=1
    r_supports=list(map(lambda x: x/len(D), supports))
    final_jaccard=(r_supports[2])/(r_supports[0]+r_supports[1]-r_supports[2])
    return(final_jaccard)

In [145]:
def conviction(D,X,Y):
    support_y=0
    for transaction in D:
        if all(item in transaction for item in Y):
            support_y+=1
    r_support_y=support_y/len(D)
    confidence_xy=conf(D,X,Y)
    if confidence_xy>=1:
        return None
    else:
        final_conviction=(1-r_support_y)/(1-confidence_xy)
        return(final_conviction)

In [146]:
def oddsRatio(D,X,Y):
    #0->XY	1->X¬Y	2->¬XY	3->¬X¬Y
    supports=[0,0,0,0]
    for transaction in D:
        if all(item in transaction for item in X):
            if all(item in transaction for item in Y):
                supports[0]+=1
            else:
                supports[1]+=1
        else:
            if all(item in transaction for item in Y):
                supports[2]+=1
            else:
                supports[3]+=1
    final_odds_ratio=((supports[0]+1)*(supports[3]+1))/((supports[1]+1)*(supports[2]+1))
    return(final_odds_ratio)

In [147]:
def imp(D,X,Y):
    if len(X) < 2:
        return 0
    else:
        w_conf=[]
        conf_xy=conf(D,X,Y)
        for W in X:
            w_conf.append(conf(D,W,Y))
        final_improvement=conf_xy-max(w_conf)
        return final_improvement

In [148]:
def getRuleMetric(D,X,Y,metric):
    if metric == "sup":
        return sup(D,X,Y)
    elif metric == "conf":
        return conf(D,X,Y)
    elif metric == "lift":
        return lift(D,X,Y)
    elif metric == "leverage":
        return leverage(D,X,Y)
    elif metric == "jaccard":
        return jaccard(D,X,Y)
    elif metric == "conviction":
        return conviction(D,X,Y)
    elif metric == "oddsratio":
        return oddsRatio(D,X,Y)
    elif metric == "imp":
        return imp(D,X,Y)
    else:
        return "metrica invalida"
    
print(f"Premisa: {example_premis}\nConclusión:{example_conclusion}\n")
for metric in metrics:
    result= getRuleMetric(example_database,example_premis,example_conclusion,metric)
    print(f"{metric}: {result}")

Premisa: ['E', 'B']
Conclusión:['D']

sup: 3
conf: 0.6
lift: 0.8999999999999999
leverage: -0.05555555555555558
jaccard: 0.5
conviction: 0.8333333333333334
oddsratio: 0.6666666666666666
imp: -0.06666666666666665


## Exercise 2.2

In [149]:
def filterProductiveRules(D, R):
    productive_rules = []
    for rule in R:
        X, Y, sup_XY, conf_X_Y = rule
        imp_XY = imp(D, X, Y)
        if imp_XY > 0:
            productive_rules.append(rule)
    return productive_rules


## Exercise 2.3

In [17]:
R = getStrongRulesForDatabase(read_database("shop.dat"), 500, 0.95)

productive_rules = filterProductiveRules("shop.dat", R)

df = pd.DataFrame(productive_rules, columns=['X', 'Y', 'sup(XY)', 'conf(X -> Y)'])

metrics = ['sup(XY)', 'conf(X -> Y)', 'lift(X -> Y)', 'leverage(X -> Y)', 'jaccard(X -> Y)', 'conviction(X -> Y)', 'oddsRatio(X -> Y)', 'imp(X -> Y)']

for i in range(len(metrics)):
    for j in range(i+1, len(metrics)):
        for k in range(j+1, len(metrics)):
            fig = plt.figure(figsize=(10, 8))
            ax = fig.add_subplot(111, projection='3d')
            ax.scatter(df[metrics[i]], df[metrics[j]], df[metrics[k]])

            ax.set_xlabel(metrics[i])
            ax.set_ylabel(metrics[j])
            ax.set_zlabel(metrics[k])

            for idx, row in df.iterrows():
                ax.text(row[metrics[i]], row[metrics[j]], row[metrics[k]], str(idx))

            plt.title(f"3D Scatter Plot of {metrics[i]}, {metrics[j]}, and {metrics[k]}")
            plt.show()


NameError: name 'read_database' is not defined