In [99]:
from apyori import apriori
from helper_functions import load_dataset
from typing import List

### Loading the cleaned dataset

In [100]:
transactions = None
try:
    data = load_dataset('../data/assignment1_income_levels_cleaned.xlsx')
    transactions = data.astype(str).values.tolist()
except FileNotFoundError:
    print('File not found')

## 2) Search for Association Rules 

### Function to run the apriori algorithm

In [ ]:
def run_apriori(transactions: List[List], min_support: float, min_confidence: float, min_length: int, sorted_by_support: bool=True):
    """
    Run the apriori algorithm and write the results (rules) to a txt file
    :param transactions: dataset as a list of lists
    :param min_support: minimum support of relations
    :param min_confidence: minimum confidence of relations
    :param min_length: minimum number of items in a rule
    :param sorted_by_support: True if the results should be sorted by support, False otherwise
    :return: 
    """
    file_name = f'../output/output_{min_support}_{min_confidence}_{min_length}.txt'
    results = list(apriori(transactions, min_support=min_support, min_confidence=min_confidence, min_length=min_length))
    try:
        with open(file_name, 'w') as file:
            if sorted_by_support:
                results = sorted(results, key=lambda x: x.support, reverse=True)
            for rule in results:
                if len(rule.items) == 1:
                    continue
                file.write(f"Items: ({', '.join(rule.items)})\n")
                for i in range(0, len(rule.ordered_statistics)):
                    antecedent = ', '.join(rule.ordered_statistics[i].items_base)
                    consequent = ', '.join(rule.ordered_statistics[i].items_add)
                    
                    if len(antecedent) == 0 or len(consequent) == 0: # skip rules with empty antecedent or consequent
                        continue
                        
                    support = str(rule.support)[:7] # support is a float, so we need to convert it to string and limit the number of decimal places
                    confidence = str(rule.ordered_statistics[i].confidence)[:7] # confidence is a float, so we need to convert it to string and limit the number of decimal places
                    lift = str(rule.ordered_statistics[i].lift)[:7] # lift is a float, so we need to convert it to string and limit the number of decimal places
                
                    file.write(f"\t{antecedent} => {consequent} (support: {support}, confidence: {confidence}, lift: {lift})\n")
                file.write('\n')
                
    except FileNotFoundError:
        print('File not found')

### a) Running the apriori algorithm

In [101]:
min_support = 0.2
min_confidence = 0.95
min_length = 2

sorted_by_support = True
run_apriori(transactions, min_support, min_confidence, min_length, sorted_by_support)

### b) Running the apriori algorithm