# Importing the libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Importing the dataset

In [None]:
df = pd.read_csv('Transaction.csv')
df.sample
df.head(10)

Unnamed: 0,Trans_Id,Items
0,1,"F,G,I,J"
1,2,"G,H,J"
2,3,"G,I,J"
3,4,"G,H,J"
4,5,"G,H,I,J"
5,6,"G,H,I"


# Pre-processing

In [None]:
all_items = sorted(set(item.strip() for sublist in df['Items'].str.split(',') for item in sublist if item))
all_items

['F', 'G', 'H', 'I', 'J']

In [None]:
transaction_df = pd.DataFrame(columns=['Trans_Id'] + all_items)
transaction_df['Trans_Id'] = df['Trans_Id']
transaction_df

Unnamed: 0,Trans_Id,F,G,H,I,J
0,1,,,,,
1,2,,,,,
2,3,,,,,
3,4,,,,,
4,5,,,,,
5,6,,,,,


In [None]:
for index, row in df.iterrows():
    items = set(row['Items'].split(','))
    for item in all_items:
        transaction_df.at[index, item] = True if item in items else False
transaction_df.head()

Unnamed: 0,Trans_Id,F,G,H,I,J
0,1,True,True,False,True,True
1,2,False,True,True,False,True
2,3,False,True,False,True,True
3,4,False,True,True,False,True
4,5,False,True,True,True,True


In [None]:
transaction_df.drop('Trans_Id', axis=1, inplace=True)
transaction_df

  and should_run_async(code)


Unnamed: 0,F,G,H,I,J
0,True,True,False,True,True
1,False,True,True,False,True
2,False,True,False,True,True
3,False,True,True,False,True
4,False,True,True,True,True
5,False,True,True,True,False


# Construct model using Association Rules

## Apply FP-growth to determine all the frequent itemsets. Set minimum support value to 50%

In [None]:
from mlxtend.frequent_patterns import apriori
frequent_itemsets = apriori(transaction_df, min_support=0.50, use_colnames=True)
frequent_itemsets

  and should_run_async(code)


Unnamed: 0,support,itemsets
0,1.0,(G)
1,0.666667,(H)
2,0.666667,(I)
3,0.833333,(J)
4,0.666667,"(G, H)"
5,0.666667,"(I, G)"
6,0.833333,"(G, J)"
7,0.5,"(J, H)"
8,0.5,"(I, J)"
9,0.5,"(G, J, H)"


## Apply “Create Association Rules” operators. Set confidence value to 75%

In [None]:
from mlxtend.frequent_patterns import association_rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.75)
rules

  and should_run_async(code)
  sqr = _ensure_numeric((avg - values) ** 2)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(G),(H),1.0,0.666667,0.666667,0.666667,1.0,0.0,1.0,0.0
1,(H),(G),0.666667,1.0,0.666667,1.0,1.0,0.0,inf,0.0
2,(I),(G),0.666667,1.0,0.666667,1.0,1.0,0.0,inf,0.0
3,(G),(I),1.0,0.666667,0.666667,0.666667,1.0,0.0,1.0,0.0
4,(G),(J),1.0,0.833333,0.833333,0.833333,1.0,0.0,1.0,0.0
5,(J),(G),0.833333,1.0,0.833333,1.0,1.0,0.0,inf,0.0
6,(J),(H),0.833333,0.666667,0.5,0.6,0.9,-0.055556,0.833333,-0.4
7,(H),(J),0.666667,0.833333,0.5,0.75,0.9,-0.055556,0.666667,-0.25
8,(I),(J),0.666667,0.833333,0.5,0.75,0.9,-0.055556,0.666667,-0.25
9,(J),(I),0.833333,0.666667,0.5,0.6,0.9,-0.055556,0.833333,-0.4


# Model deployment using Association Rules

## List all the frequent itemsets that have been found (with their support)

In [None]:
frequent_itemsets

  and should_run_async(code)


Unnamed: 0,support,itemsets
0,1.0,(G)
1,0.666667,(H)
2,0.666667,(I)
3,0.833333,(J)
4,0.666667,"(G, H)"
5,0.666667,"(I, G)"
6,0.833333,"(G, J)"
7,0.5,"(J, H)"
8,0.5,"(I, J)"
9,0.5,"(G, J, H)"


## What is the maximal frequent itemset found? Give its support? List all the strong association rules that can be generated from it.

In [None]:
rules.sort_values(by='confidence', ascending=False)

  and should_run_async(code)
  sqr = _ensure_numeric((avg - values) ** 2)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
12,"(H, J)",(G),0.5,1.0,0.5,1.0,1.0,0.0,inf,0.0
2,(I),(G),0.666667,1.0,0.666667,1.0,1.0,0.0,inf,0.0
5,(J),(G),0.833333,1.0,0.833333,1.0,1.0,0.0,inf,0.0
17,"(I, J)",(G),0.5,1.0,0.5,1.0,1.0,0.0,inf,0.0
1,(H),(G),0.666667,1.0,0.666667,1.0,1.0,0.0,inf,0.0
4,(G),(J),1.0,0.833333,0.833333,0.833333,1.0,0.0,1.0,0.0
11,"(G, H)",(J),0.666667,0.833333,0.5,0.75,0.9,-0.055556,0.666667,-0.25
19,(I),"(G, J)",0.666667,0.833333,0.5,0.75,0.9,-0.055556,0.666667,-0.25
7,(H),(J),0.666667,0.833333,0.5,0.75,0.9,-0.055556,0.666667,-0.25
8,(I),(J),0.666667,0.833333,0.5,0.75,0.9,-0.055556,0.666667,-0.25
