## Problem Statement: 
You work in XYZ Company as a Python. The company officials want you to write code for a Association Rule Mining Dataset: retail_dataset.csv Tasks to be performed:
- Using pandas import the dataset as dataframe
- Install the mixtend library to use apriory and association rule mining
- Using the apriori algorithm generate a list of item frequently brought together.
- Generate the association rules for the given items from apriori algorithm

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [11]:
data = pd.read_csv('Retail_Final.csv', names=['Products'])

In [12]:
data

Unnamed: 0,Products
0,"Pencils, Markers, Highlighters, Papers"
1,"Markers, Erasers"
2,"Stapler Pins, Papers, Erasers, Card Holders, H..."
3,"Papers, Erasers, Cad Holders"
4,"Markers, Post-it, Erasers"
5,Envelop
6,"Markers, Erasers"
7,"Pencils, Markers, StaplerPins, Post-it, Highli..."
8,"StaplerPins, Post-it, Markers, Erasers"
9,Envelop


## Data Processing

In [15]:
def split_txn(str_):
    return str_.split(',')
data['Txn'] = data['Products'].apply(split_txn)

In [17]:
data = data['Txn'].to_list()

In [18]:
data

[['Pencils', ' Markers', ' Highlighters', ' Papers'],
 ['Markers', ' Erasers'],
 ['Stapler Pins', ' Papers', ' Erasers', ' Card Holders', ' Highlighters'],
 ['Papers', ' Erasers', ' Cad Holders'],
 ['Markers', ' Post-it', ' Erasers'],
 ['Envelop'],
 ['Markers', ' Erasers'],
 ['Pencils',
  ' Markers',
  ' StaplerPins',
  ' Post-it',
  ' Highlighter',
  ' Papers',
  ' Erasers'],
 ['StaplerPins', ' Post-it', ' Markers', ' Erasers'],
 ['Envelop']]

## Encoding the Transaction

In [19]:
!pip install mlxtend

Collecting mlxtend
  Downloading mlxtend-0.22.0-py2.py3-none-any.whl (1.4 MB)
     ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
     ---- ----------------------------------- 0.2/1.4 MB 3.1 MB/s eta 0:00:01
     --------- ------------------------------ 0.3/1.4 MB 3.9 MB/s eta 0:00:01
     ---------------- ----------------------- 0.6/1.4 MB 4.3 MB/s eta 0:00:01
     ---------------------- ----------------- 0.8/1.4 MB 4.3 MB/s eta 0:00:01
     ------------------------------- -------- 1.1/1.4 MB 4.6 MB/s eta 0:00:01
     ------------------------------------- -- 1.3/1.4 MB 4.7 MB/s eta 0:00:01
     ---------------------------------------  1.4/1.4 MB 4.5 MB/s eta 0:00:01
     ---------------------------------------  1.4/1.4 MB 4.5 MB/s eta 0:00:01
     ---------------------------------------  1.4/1.4 MB 4.5 MB/s eta 0:00:01
     ---------------------------------------- 1.4/1.4 MB 3.2 MB/s eta 0:00:00
Installing collected packages: mlxtend
Successfully installed mlxtend-0

In [20]:
from mlxtend.preprocessing import TransactionEncoder

In [21]:
enc =TransactionEncoder()

In [22]:
enc_data = enc.fit(data).transform(data)

In [23]:
df1 = pd.DataFrame(enc_data, columns = enc.columns_)
df1 = df1.replace(True,1)
df1 = df1.replace(False,0)
df1.head(4)

Unnamed: 0,Cad Holders,Card Holders,Erasers,Highlighter,Highlighters,Markers,Papers,Post-it,StaplerPins,Envelop,Markers.1,Papers.1,Pencils,Stapler Pins,StaplerPins.1
0,0,0,0,0,1,1,1,0,0,0,0,0,1,0,0
1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0
2,0,1,1,0,1,0,1,0,0,0,0,0,0,1,0
3,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0


## Association Rule mining

In [24]:
from mlxtend.frequent_patterns import apriori, association_rules

In [25]:
df3= apriori(df1, min_support = 0.2, use_colnames = True)



In [26]:
df3.sort_values('support', ascending = False)

Unnamed: 0,support,itemsets
0,0.7,( Erasers)
2,0.3,( Markers)
3,0.3,( Papers)
4,0.3,( Post-it)
6,0.3,(Markers)
10,0.3,"( Post-it, Erasers)"
11,0.3,"( Erasers, Markers)"
13,0.2,"( Markers, Papers)"
17,0.2,"( Markers, Post-it, Erasers)"
16,0.2,"( Papers, Pencils)"


In [27]:
df_associations = association_rules(df3, metric = 'confidence', min_threshold = 0.5)

In [28]:
df_associations

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,( Markers),( Erasers),0.3,0.7,0.2,0.666667,0.952381,-0.01,0.9,-0.066667
1,( Papers),( Erasers),0.3,0.7,0.2,0.666667,0.952381,-0.01,0.9,-0.066667
2,( Post-it),( Erasers),0.3,0.7,0.3,1.0,1.428571,0.09,inf,0.428571
3,(Markers),( Erasers),0.3,0.7,0.3,1.0,1.428571,0.09,inf,0.428571
4,( Papers),( Highlighters),0.3,0.2,0.2,0.666667,3.333333,0.14,2.4,1.0
5,( Highlighters),( Papers),0.2,0.3,0.2,1.0,3.333333,0.14,inf,0.875
6,( Markers),( Papers),0.3,0.3,0.2,0.666667,2.222222,0.11,2.1,0.785714
7,( Papers),( Markers),0.3,0.3,0.2,0.666667,2.222222,0.11,2.1,0.785714
8,( Markers),( Post-it),0.3,0.3,0.2,0.666667,2.222222,0.11,2.1,0.785714
9,( Post-it),( Markers),0.3,0.3,0.2,0.666667,2.222222,0.11,2.1,0.785714
