### Implement an Apriori algorithm for generating frequent item sets in transactional data by iteratively finding patterns that occur together.

In [1]:
import pandas as pd
import numpy as np
import math

In [5]:
transactions = [["W1","W2","W3","W4"],
    ["W1","W2","W3"],
    ["W3","W4"],
    ["W1","W2","W5"],
    ["W1","W3","W4"],
    ["W3","W4","W5"]]
transactions

[['W1', 'W2', 'W3', 'W4'],
 ['W1', 'W2', 'W3'],
 ['W3', 'W4'],
 ['W1', 'W2', 'W5'],
 ['W1', 'W3', 'W4'],
 ['W3', 'W4', 'W5']]

In [23]:
def count_item(trans_items):
    count_ind_item = {}
    for row in trans_items:
        for i in range(len(row)):
            if row[i] in count_ind_item.keys():
                count_ind_item[row[i]] += 1
            else:
                count_ind_item[row[i]] = 1
    
    data = pd.DataFrame()
    data["item_sets"] = count_ind_item.keys()
    data["supp_count"] = count_ind_item.values()
    data = data.sort_values("item_sets")
    return data

In [8]:
def prune(data,supp):
    df = data[data.supp_count>=supp]
    return df

In [31]:
def count_itemset(transaction_df, itemsets):
    count_item = {}
    for item_set in itemsets:
        set_A = set(item_set)
        for row in transaction_df:
            set_B = set(row)
            
            if set_B.intersection(set_A) == set_A:
                if item_set in count_item.keys():
                    count_item[item_set] += 1
                else:
                    count_item[item_set] = 1
        
    data = pd.DataFrame()
    data["item_sets"] = count_item.keys()
    data["supp_count"] = count_item.values()
    return data

In [33]:
def join(list_of_items):
    itemsets = []
    i = 1
    for entry in list_of_items:
        proceding_items = list_of_items[i:]
        for item in proceding_items:
            if(type(item) is str):
                if entry != item:
                    tuples = (entry,item)
                    itemsets.append(tuples)
            else:
                if entry[0:-1] == item[0:-1]:
                    tuples = entry + item[1:]
                    itemsets.append(tuples)
        
        i += 1
    
    if(len(itemsets)==0):
        return None
    return itemsets

In [67]:
def calculate_conf(value1,value2):
    return round(int(value1)/int(value2)*100,2)

In [27]:
def apriori(trans_data,supp=3,con=0.5):
    freq = pd.DataFrame()
    
    df = count_item(trans_data)
    
    while(len(df)!=0):
        df = prune(df,supp)
        
        if len(df) > 1 or (len(df) == 1 and int(df.supp_count >= supp)):
            freq = df
        
        itemsets = join(df.item_sets)
        
        if (itemsets is None):
            return freq
        
        df = count_itemset(trans_data, itemsets)
    
    return df       
        

In [93]:
def strong_rules(freq_item_sets, threshold):
    confidences = {}
    for row in freq_item_sets.item_sets:
        for i in range(len(row)):
            for j in range(len(row)):
                if i != j:
                    tuples = (row[i], row[j])
                    conf = calculate_conf(freq_item_sets[freq_item_sets.item_sets == row].supp_count.iloc[0],
                     count_item(transactions)[count_item(transactions).item_sets == row[i]].supp_count.iloc[0])

                    confidences[tuples] = conf
    
    conf_df = pd.DataFrame()
    conf_df["item_set"] = confidences.keys()
    conf_df["confidence"] = confidences.values()
    
    return conf_df[conf_df.confidence >= threshold]

In [94]:
freq_item_sets = apriori(transactions,3)
freq_item_sets 

Unnamed: 0,item_sets,supp_count
0,"(W1, W2)",3
1,"(W1, W3)",3
5,"(W3, W4)",4


In [96]:
strong_rules(freq_item_sets, 80.0)

Unnamed: 0,item_set,confidence
1,"(W2, W1)",100.0
4,"(W3, W4)",80.0
5,"(W4, W3)",100.0


## -- END --