## 1. Dataset Load

### Load the dataset in classic way

In [1]:
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

#### Loading the dataset using pandas

In [2]:
import pandas as pd

# create a dataframe from the dictionary
df = pd.DataFrame(dataset)

# write the dataframe to a CSV file
df.to_csv('dataset.csv', index=False, header=None)

#### After that using csv python package to change the csv file to list

In [3]:
import csv

with open('dataset.csv', newline='', encoding='utf-8') as csvfile:
    data = csv.reader(csvfile)
    data_list = list(data)

    print(data_list)

[['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'], ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'], ['Milk', 'Apple', 'Kidney Beans', 'Eggs', '', ''], ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt', ''], ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]


#### copy and past the output above into variable 

In [4]:
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'], ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'], ['Milk', 'Apple', 'Kidney Beans', 'Eggs', '', ''], ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt', ''], ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

In [5]:
## don't try loading the   .csv    file using   read_csv()   function from pandas 

# import pandas as pd
# dataset = pd.read_csv('dataset.csv')
# print(df)

## 2. Installing mlxtend python package

`mlxtend` (machine learning extensions) is a Python library for a wide range of useful tools for machine learning and data analysis. It is built on top of popular Python libraries such as `NumPy`, `Pandas`, and `scikit-learn`, and provides a collection of functions for tasks such as data preprocessing, feature selection, feature extraction, model evaluation, and model selection.

In [6]:
!pip install mlxtend



## 3. Loading require packages

In [7]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

## 4. Transform the data

changing the dataset to `True` and `False` value using `TransactionEncode()` function this is for correlation purpose

In [8]:
te = TransactionEncoder()
te_try = te.fit(dataset).transform(dataset)

### Generate Dataframe

In [9]:
df = pd.DataFrame(te_try, columns=te.columns_)

In [10]:
df

Unnamed: 0,Unnamed: 1,Apple,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,False,True,False,True,True,True,True,False,True
1,False,False,False,True,True,False,True,False,True,True,False,True
2,True,True,False,False,True,False,True,True,False,False,False,False
3,True,False,True,False,False,False,True,True,False,False,True,True
4,False,False,True,False,True,True,True,False,False,True,False,False


## 5. Model Training

### Finding Support

In [11]:
from mlxtend.frequent_patterns import apriori

### Returning this data that minimum support is ( 0.5 )

In [12]:
apriori(df,min_support=0.5)

Unnamed: 0,support,itemsets
0,0.8,(4)
1,1.0,(6)
2,0.6,(7)
3,0.6,(9)
4,0.6,(11)
5,0.8,"(4, 6)"
6,0.6,"(9, 4)"
7,0.6,"(6, 7)"
8,0.6,"(9, 6)"
9,0.6,"(11, 6)"


### Model Training with Name of the Actual Data

In [13]:
apriori(df,min_support=0.5, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Kidney Beans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Kidney Beans, Eggs)"
6,0.6,"(Onion, Eggs)"
7,0.6,"(Kidney Beans, Milk)"
8,0.6,"(Onion, Kidney Beans)"
9,0.6,"(Yogurt, Kidney Beans)"


### Calculate the length of Itemset

In [14]:
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.8,(Eggs),1
1,1.0,(Kidney Beans),1
2,0.6,(Milk),1
3,0.6,(Onion),1
4,0.6,(Yogurt),1
5,0.8,"(Kidney Beans, Eggs)",2
6,0.6,"(Onion, Eggs)",2
7,0.6,"(Kidney Beans, Milk)",2
8,0.6,"(Onion, Kidney Beans)",2
9,0.6,"(Yogurt, Kidney Beans)",2


### Length is 2 and Support is >= 0.6

* get back this raw that have more than 2 item with each other 
* also get back this item that have support more than (0.6)

In [15]:
frequent_itemsets[ (frequent_itemsets['length'] == 2) & (frequent_itemsets['support'] >= 0.6) ]

Unnamed: 0,support,itemsets,length
5,0.8,"(Kidney Beans, Eggs)",2
6,0.6,"(Onion, Eggs)",2
7,0.6,"(Kidney Beans, Milk)",2
8,0.6,"(Onion, Kidney Beans)",2
9,0.6,"(Yogurt, Kidney Beans)",2


TB: `assessment` lab 4 for finding correlation between 3 or greater than 3 items that have unless support 0.6 

In [16]:
frequent_itemsets[ (frequent_itemsets['length'] >= 3) & (frequent_itemsets['support'] >= 0.6) ]

Unnamed: 0,support,itemsets,length
10,0.6,"(Onion, Kidney Beans, Eggs)",3


TB: `assessment` lab 4 for finding correlation between less than 3 items that have unless support 0.6 

In [17]:
frequent_itemsets[ (frequent_itemsets['length'] < 3) & (frequent_itemsets['support'] >= 0.6) ]

Unnamed: 0,support,itemsets,length
0,0.8,(Eggs),1
1,1.0,(Kidney Beans),1
2,0.6,(Milk),1
3,0.6,(Onion),1
4,0.6,(Yogurt),1
5,0.8,"(Kidney Beans, Eggs)",2
6,0.6,"(Onion, Eggs)",2
7,0.6,"(Kidney Beans, Milk)",2
8,0.6,"(Onion, Kidney Beans)",2
9,0.6,"(Yogurt, Kidney Beans)",2


## Finding Correlation Between the item

In [18]:
from mlxtend.frequent_patterns import association_rules

In [19]:
resource = association_rules(frequent_itemsets, metric="support", min_threshold=0.7)

In [20]:
resource

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Kidney Beans),(Eggs),1.0,0.8,0.8,0.8,1.0,0.0,1.0
1,(Eggs),(Kidney Beans),0.8,1.0,0.8,1.0,1.0,0.0,inf


In [21]:
resource = association_rules(frequent_itemsets, metric="support", min_threshold=0.8)

In [22]:
resource

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Kidney Beans),(Eggs),1.0,0.8,0.8,0.8,1.0,0.0,1.0
1,(Eggs),(Kidney Beans),0.8,1.0,0.8,1.0,1.0,0.0,inf
