<a href="https://colab.research.google.com/github/SkylarkOff/FP-Growth/blob/main/FP_Growth.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install mlxtend

import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules



In [None]:
file_path = '/content/drive/MyDrive/Colab Notebooks/FP-Growth/Supermart Grocery Sales - Retail Analytics Dataset.csv'

df = pd.read_csv(file_path)

print(df.columns)
print(df.head())

  and should_run_async(code)


Index(['Order ID', 'Customer Name', 'Category', 'Sub Category', 'City',
       'Order Date', 'Region', 'Sales', 'Discount', 'Profit', 'State'],
      dtype='object')
  Order ID Customer Name          Category      Sub Category         City  \
0      OD1        Harish      Oil & Masala           Masalas      Vellore   
1      OD2         Sudha         Beverages     Health Drinks  Krishnagiri   
2      OD3       Hussain       Food Grains      Atta & Flour   Perambalur   
3      OD4       Jackson  Fruits & Veggies  Fresh Vegetables   Dharmapuri   
4      OD5       Ridhesh       Food Grains   Organic Staples         Ooty   

   Order Date Region  Sales  Discount  Profit       State  
0  11-08-2017  North   1254      0.12  401.28  Tamil Nadu  
1  11-08-2017  South    749      0.18  149.80  Tamil Nadu  
2  06-12-2017   West   2360      0.21  165.20  Tamil Nadu  
3  10-11-2016  South    896      0.25   89.60  Tamil Nadu  
4  10-11-2016  South   2355      0.26  918.45  Tamil Nadu  


### **Tranformasi Data**

In [None]:
data_transformed = df[['Order ID', 'Category', 'Sub Category']].copy()

  and should_run_async(code)


In [None]:
data_grouped = data_transformed.groupby('Order ID').agg(lambda x: ','.join(x)).reset_index()

  and should_run_async(code)


In [None]:
all_categories = set()
all_subcategories = set()

for index, row in data_grouped.iterrows():
    categories = row['Category'].split(',')
    subcategories = row['Sub Category'].split(',')
    all_categories.update(categories)
    all_subcategories.update(subcategories)

  and should_run_async(code)


In [None]:
for category in all_categories:
    data_grouped[category] = data_grouped['Category'].apply(lambda x: 1 if category in x.split(',') else 0)

for subcategory in all_subcategories:
    data_grouped[subcategory] = data_grouped['Sub Category'].apply(lambda x: 1 if subcategory in x.split(',') else 0)

  and should_run_async(code)


In [None]:
data_final = data_grouped.drop(columns=['Category', 'Sub Category'])

  and should_run_async(code)


In [None]:
print("\nTransformed Data:")
print(data_final.head())


Transformed Data:
  Order ID  Oil & Masala   Meat & Fish  Eggs  Food Grains  Snacks  Bakery  \
0      OD1             1             0     0            0       0       0   
1     OD10             0             0     0            0       0       1   
2    OD100             1             0     0            0       0       0   
3   OD1000             0             0     0            0       0       1   
4   OD1001             0             0     0            0       1       0   

   Fruits & Veggies  Beverages  Chicken  ...  Cookies  Masalas  Rice  \
0                 0          0        0  ...        0        1     0   
1                 0          0        0  ...        0        0     0   
2                 0          0        0  ...        0        0     0   
3                 0          0        0  ...        0        0     0   
4                 0          0        0  ...        1        0     0   

   Biscuits  Chocolates  Dals & Pulses  Cakes  Mutton  Fish  Atta & Flour  
0        

  and should_run_async(code)


### **Algoritma FP-Growth**

In [None]:
frequent_itemsets = fpgrowth(data_final.drop('Order ID', axis=1), min_support=0.01, use_colnames=True)

print("Frequent Itemsets:")
print(frequent_itemsets)

Frequent Itemsets:
     support                                itemsets
0   0.136182                          (Oil & Masala)
1   0.046328                               (Masalas)
2   0.141385                                (Bakery)
3   0.045227                                 (Cakes)
4   0.044727                                (Spices)
5   0.050230                         (Breads & Buns)
6   0.151491                                (Snacks)
7   0.052031                               (Cookies)
8   0.140084                             (Beverages)
9   0.071943                         (Health Drinks)
10  0.049930                            (Chocolates)
11  0.149089                          ( Meat & Fish)
12  0.036922                                  (Fish)
13  0.141885                      (Fruits & Veggies)
14  0.035421                      (Fresh Vegetables)
15  0.139884                           (Food Grains)
16  0.037222                       (Organic Staples)
17  0.037923               

  and should_run_async(code)


In [None]:
print(frequent_itemsets.sort_values(by='support', ascending=False))

     support                                itemsets
6   0.151491                                (Snacks)
11  0.149089                          ( Meat & Fish)
13  0.141885                      (Fruits & Veggies)
2   0.141385                                (Bakery)
8   0.140084                             (Beverages)
15  0.139884                           (Food Grains)
0   0.136182                          (Oil & Masala)
9   0.071943                         (Health Drinks)
35  0.071943              (Health Drinks, Beverages)
42  0.068141                (Beverages, Soft Drinks)
19  0.068141                           (Soft Drinks)
34  0.052031                       (Snacks, Cookies)
7   0.052031                               (Cookies)
5   0.050230                         (Breads & Buns)
33  0.050230                 (Breads & Buns, Bakery)
10  0.049930                            (Chocolates)
36  0.049930                    (Snacks, Chocolates)
46  0.049530                       (Snacks, No

  and should_run_async(code)


In [None]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

print("Association Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

Association Rules:
             antecedents           consequents   support  confidence      lift
0         (Oil & Masala)             (Masalas)  0.046328    0.340191  7.343130
1              (Masalas)        (Oil & Masala)  0.046328    1.000000  7.343130
2                (Cakes)              (Bakery)  0.045227    1.000000  7.072895
3               (Bakery)               (Cakes)  0.045227    0.319887  7.072895
4         (Oil & Masala)              (Spices)  0.044727    0.328435  7.343130
5               (Spices)        (Oil & Masala)  0.044727    1.000000  7.343130
6        (Breads & Buns)              (Bakery)  0.050230    1.000000  7.072895
7               (Bakery)       (Breads & Buns)  0.050230    0.355272  7.072895
8               (Snacks)             (Cookies)  0.052031    0.343461  6.601057
9              (Cookies)              (Snacks)  0.052031    1.000000  6.601057
10       (Health Drinks)           (Beverages)  0.071943    1.000000  7.138571
11           (Beverages)       (H

  and should_run_async(code)


In [None]:
top_rules = rules.sort_values(by='lift', ascending=False)
print("Top 10 Rules by Lift:")
print(top_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

Top 10 Rules by Lift:
             antecedents           consequents   support  confidence      lift
0         (Oil & Masala)             (Masalas)  0.046328    0.340191  7.343130
41   (Edible Oil & Ghee)        (Oil & Masala)  0.045127    1.000000  7.343130
4         (Oil & Masala)              (Spices)  0.044727    0.328435  7.343130
5               (Spices)        (Oil & Masala)  0.044727    1.000000  7.343130
40        (Oil & Masala)   (Edible Oil & Ghee)  0.045127    0.331374  7.343130
1              (Masalas)        (Oil & Masala)  0.046328    1.000000  7.343130
29         (Food Grains)        (Atta & Flour)  0.035321    0.252504  7.148784
18     (Organic Staples)         (Food Grains)  0.037222    1.000000  7.148784
44                (Rice)         (Food Grains)  0.033020    1.000000  7.148784
28        (Atta & Flour)         (Food Grains)  0.035321    1.000000  7.148784
27         (Food Grains)       (Dals & Pulses)  0.034321    0.245351  7.148784
26       (Dals & Pulses)      

  and should_run_async(code)


In [None]:
file_path = '/content/drive/MyDrive/Colab Notebooks/FP-Growth/rules.csv'
rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].to_csv(file_path, index=False)

print(f"File disimpan di: {file_path}")

File disimpan di: /content/drive/MyDrive/Colab Notebooks/FP-Growth/rules.csv


  and should_run_async(code)
