# Apriori algorithm

In [1]:
import pandas as pd
from apyori import apriori

In [2]:
df = pd.read_csv('spending-grouped-newclasses.csv')
df.head()

Unnamed: 0,housing,food,transportation,income,non-essential,health,university,Class
0,709,296,123,1228,435,205,494.916667,6
1,557,365,85,1881,620,221,409.0,6
2,666,220,137,1662,601,135,254.25,4
3,652,289,114,882,540,135,411.25,6
4,825,372,168,1332,434,142,323.916667,5


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 996 entries, 0 to 995
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   housing         996 non-null    int64  
 1   food            996 non-null    int64  
 2   transportation  996 non-null    int64  
 3   income          996 non-null    int64  
 4   non-essential   996 non-null    int64  
 5   health          996 non-null    int64  
 6   university      996 non-null    float64
 7   Class           996 non-null    int64  
dtypes: float64(1), int64(7)
memory usage: 62.4 KB


In [4]:
def convert_to_cat(df, num_rangos):
    df_cat = df.copy()
    
    for col in df_cat.select_dtypes(include=['float64', 'int64']).columns:
        if col == "Class":
            df_cat[col] = df_cat[col].astype(str)
            df_cat[col] = df_cat[col].apply(lambda x: f"{col} / {x}")
        else:
            intervalos = pd.cut(df_cat[col], bins=num_rangos)
            df_cat[col] = intervalos.apply(
                lambda x: f"{col} / {x.left}-{x.right}" if pd.notnull(x) else x
            )
    
    return df_cat


In [5]:
df_t = convert_to_cat(df, 5); 
df_t.head()

Unnamed: 0,housing,food,transportation,income,non-essential,health,university,Class
0,housing / 640.6-760.4,food / 280.0-340.0,transportation / 110.0-140.0,income / 947.0-1334.0,non-essential / 361.0-490.0,health / 199.6-248.8,university / 450.05-500.0,Class / 6
1,housing / 520.8-640.6,food / 340.0-400.0,transportation / 80.0-110.0,income / 1721.0-2108.0,non-essential / 619.0-748.0,health / 199.6-248.8,university / 400.1-450.05,Class / 6
2,housing / 640.6-760.4,food / 160.0-220.0,transportation / 110.0-140.0,income / 1334.0-1721.0,non-essential / 490.0-619.0,health / 101.2-150.4,university / 250.0-300.2,Class / 4
3,housing / 640.6-760.4,food / 280.0-340.0,transportation / 110.0-140.0,income / 558.065-947.0,non-essential / 490.0-619.0,health / 101.2-150.4,university / 400.1-450.05,Class / 6
4,housing / 760.4-880.2,food / 340.0-400.0,transportation / 140.0-170.0,income / 947.0-1334.0,non-essential / 361.0-490.0,health / 101.2-150.4,university / 300.2-350.15,Class / 5


In [6]:
transactions = df_t.values.tolist()
transactions

[['housing / 640.6-760.4',
  'food / 280.0-340.0',
  'transportation / 110.0-140.0',
  'income / 947.0-1334.0',
  'non-essential / 361.0-490.0',
  'health / 199.6-248.8',
  'university / 450.05-500.0',
  'Class / 6'],
 ['housing / 520.8-640.6',
  'food / 340.0-400.0',
  'transportation / 80.0-110.0',
  'income / 1721.0-2108.0',
  'non-essential / 619.0-748.0',
  'health / 199.6-248.8',
  'university / 400.1-450.05',
  'Class / 6'],
 ['housing / 640.6-760.4',
  'food / 160.0-220.0',
  'transportation / 110.0-140.0',
  'income / 1334.0-1721.0',
  'non-essential / 490.0-619.0',
  'health / 101.2-150.4',
  'university / 250.0-300.2',
  'Class / 4'],
 ['housing / 640.6-760.4',
  'food / 280.0-340.0',
  'transportation / 110.0-140.0',
  'income / 558.065-947.0',
  'non-essential / 490.0-619.0',
  'health / 101.2-150.4',
  'university / 400.1-450.05',
  'Class / 6'],
 ['housing / 760.4-880.2',
  'food / 340.0-400.0',
  'transportation / 140.0-170.0',
  'income / 947.0-1334.0',
  'non-essentia

In [7]:
rules = apriori(transactions = transactions, min_support = 0.01, min_cinfidence = 0.2, min_lift = 3, min_length = 2, max_length = 4)

In [8]:
results = list(rules)
results

[RelationRecord(items=frozenset({'housing / 760.4-880.2', 'Class / 1', 'food / 160.0-220.0'}), support=0.02108433734939759, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Class / 1'}), items_add=frozenset({'housing / 760.4-880.2', 'food / 160.0-220.0'}), confidence=0.1381578947368421, lift=3.440131578947368), OrderedStatistic(items_base=frozenset({'housing / 760.4-880.2', 'food / 160.0-220.0'}), items_add=frozenset({'Class / 1'}), confidence=0.5249999999999999, lift=3.4401315789473674)]),
 RelationRecord(items=frozenset({'housing / 880.2-1000.0', 'Class / 1', 'food / 160.0-220.0'}), support=0.01706827309236948, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Class / 1'}), items_add=frozenset({'housing / 880.2-1000.0', 'food / 160.0-220.0'}), confidence=0.1118421052631579, lift=3.1827067669172933), OrderedStatistic(items_base=frozenset({'housing / 880.2-1000.0', 'food / 160.0-220.0'}), items_add=frozenset({'Class / 1'}), confidence=0.4857142857142857, lift=3.182

In [9]:
def inspect(results):
    lhs = []
    rhs = []
    supports = []
    confidences = []
    lifts = [] 

    for result in results:
        lhs_items = tuple(result[2][0][0])      
        rhs_item = tuple(result[2][0][1])
        lhs.append(lhs_items)
        rhs.append(rhs_item)
        supports.append(result[1])
        confidences.append(result[2][0][2])
        lifts.append(result[2][0][3])

    return list(zip(lhs, rhs, supports, confidences, lifts))

resultsinDataFrame = pd.DataFrame(inspect(results), columns=["LeftHand", "RightHand", "Support", "Confidence", "Lift"])
resultsinDataFrame.to_csv("association_rules.csv", index=False)

resultsinDataFrame = resultsinDataFrame[
    resultsinDataFrame['LeftHand'].apply(
        lambda x: isinstance(x, tuple) and len(x) == 1 and 'Class / ' in x[0]
    )
]
resultsinDataFrame.to_csv("association_rules_class.csv", index=False)

# Assosation rules without Classes

In [10]:
df_t.drop(columns=["Class"], inplace=True)
df_t.head()

Unnamed: 0,housing,food,transportation,income,non-essential,health,university
0,housing / 640.6-760.4,food / 280.0-340.0,transportation / 110.0-140.0,income / 947.0-1334.0,non-essential / 361.0-490.0,health / 199.6-248.8,university / 450.05-500.0
1,housing / 520.8-640.6,food / 340.0-400.0,transportation / 80.0-110.0,income / 1721.0-2108.0,non-essential / 619.0-748.0,health / 199.6-248.8,university / 400.1-450.05
2,housing / 640.6-760.4,food / 160.0-220.0,transportation / 110.0-140.0,income / 1334.0-1721.0,non-essential / 490.0-619.0,health / 101.2-150.4,university / 250.0-300.2
3,housing / 640.6-760.4,food / 280.0-340.0,transportation / 110.0-140.0,income / 558.065-947.0,non-essential / 490.0-619.0,health / 101.2-150.4,university / 400.1-450.05
4,housing / 760.4-880.2,food / 340.0-400.0,transportation / 140.0-170.0,income / 947.0-1334.0,non-essential / 361.0-490.0,health / 101.2-150.4,university / 300.2-350.15


In [11]:
transactions = df_t.values.tolist()
rules = apriori(transactions = transactions, min_support = 0.01, min_cinfidence = 0.2, min_lift = 3, min_length = 2, max_length = 4)
results = list(rules)


In [12]:
resultsinDataFrame = pd.DataFrame(inspect(results), columns=["LeftHand", "RightHand", "Support", "Confidence", "Lift"])

resultsinDataFrame.to_csv("association_rules_woclass.csv", index=False)