### Association rules

In [1]:
import pandas as pd

In [2]:
books = pd.read_csv('data/bookstore_transactions.csv')

In [3]:
transactions = books['Transaction'].apply(lambda x:x.split(','))
transactions = list(transactions)
transactions.count(['History', 'Bookmark'])

25

In [4]:
from itertools import permutations

In [10]:
flattened = [ item for transaction in transactions for item in transaction]
items = list(set(flattened))
rules = list(permutations(items,2))

#### Summary

`List of lists of transactions -> unique items -> permutation to generate rules`

### Metrics and pruning
**Support: Number of transactions contain particular item set / number of transactions**

In [33]:
from mlxtend.frequent_patterns import association_rules
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import TransactionEncoder

In [34]:
encoder = TransactionEncoder().fit(transactions)
onehot = encoder.transform(transactions)
onehot = pd.DataFrame(onehot,columns = encoder.columns_)

In [35]:
import numpy as np

In [36]:
onehot

Unnamed: 0,Biography,Bookmark,Fiction,History,Poetry
0,False,True,False,True,False
1,False,True,False,True,False
2,False,True,True,False,False
3,True,True,False,False,False
4,False,True,False,True,False
...,...,...,...,...,...
94,True,True,False,False,False
95,False,True,False,False,True
96,False,True,False,True,False
97,True,True,False,False,False


In [37]:
onehot['Fiction+Bookmark'] = np.logical_and(onehot['Fiction'], onehot['Bookmark'])

`Calculate support using np.mean`

In [38]:
onehot.mean()

Biography           0.404040
Bookmark            1.000000
Fiction             0.252525
History             0.252525
Poetry              0.090909
Fiction+Bookmark    0.252525
dtype: float64

**Confidence and lift**

$\frac{Support(X,Y)}{Support(X)}$