In [1]:
# Langkah 1: Mengimpor library yang diperlukan
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
# Langkah 2: Meload dan Mengeksplor data
# Memuat Data
data = pd.read_excel("dataku.xlsx")
data.head()

Unnamed: 0,invoice,id_barang,deskripsi,quantity(KTN),tanggal,pelanggan,kota
0,114555,A23201A,TARO NET SP,1.0,2023-01-14,POJOK SIKAMPUH,CILACAP
1,114556,A23201A,TARO NET SP,6.0,2023-01-14,POJOK SIKAMPUH,CILACAP
2,118586,A23201A,TARO NET SP,2.0,2023-01-18,AYAT PS.MAJENANG,CILACAP
3,119564,A23201A,TARO NET SP,3.0,2023-01-19,ARIS PASAR MAJENANG,CILACAP
4,119572,A23201A,TARO NET SP,10.0,2023-01-19,BAGJA MAJENANG,CILACAP


In [3]:
# Mengeksplor kolom data
data.columns

Index(['invoice', 'id_barang', 'deskripsi', 'quantity(KTN)', 'tanggal',
       'pelanggan', 'kota'],
      dtype='object')

In [4]:
# Mengeksplor berbagai wilayah transaksi
data.kota.unique()

array(['CILACAP'], dtype=object)

In [5]:
# Langkah 3: Membersihkan Data
# Menghapus ruang ekstra dalam deskripsi
data['deskripsi'] = data['deskripsi'].str.strip()

# Menghapus baris tanpa nomor faktur (InvoiceNo)
data.dropna(axis = 0, subset =['invoice'], inplace = True)
data['invoice'] = data['invoice'].astype('str')


In [6]:
# Langkah 4: Memisahkan data sesuai dengan wilayah transaksi
# Transaksi dilakukan di Cilacap
basket_Cilacap = (data[data['kota'] =="CILACAP"]
          .groupby(['pelanggan', 'deskripsi'])['quantity(KTN)']
          .sum().unstack().reset_index().fillna(0)
          .set_index('pelanggan'))
basket_Cilacap

deskripsi,BIHUN SUPERIOR,BIHUNKU SEDUH,KREMEZZ,KREMEZZ KREZZ,MIE AYAM 2 TELOR,MIE KREMEZZ STIKZ,MIE SUPERIOR,SHOOR MP,TARO 3D SP,TARO FRENCH FRIES,TARO NET,TARO NET FAMILY PACK,TARO NET SP,TARO PUFF,TARO PUFF SP,WAFER STICK PIO
pelanggan,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2004 KARANGPUCUNG,0.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0
24 BANTARSARI,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24 PS. ADIPALA,0.0,0.0,0.0,0.0,0.0,1.0,85.0,4.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0
3M BAROKAH JAYA KARANG PUCUNG,50.0,1.0,0.0,1.0,0.0,1.0,231.5,2.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
99 KAWUNGANTEN,0.0,0.0,0.0,3.0,0.0,0.0,50.0,0.0,0.0,0.0,1.0,4.0,20.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YUFIS BINANGUN,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0
YUNI (S)KROYA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0
YUS (S)PS. KESUGIHAN,0.0,0.0,5.0,1.0,0.0,1.0,0.0,5.0,0.0,0.0,0.0,0.0,10.0,0.0,1.0,0.0
YUYUNG LEBENG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [7]:
# Langkah 5: Pengkodean Data dengan "Hot Encoding"
# Mendefinisikan fungsi "Hot Encoding" untuk membuat data sesuai untuk library yang bersangkutan
# Mengubah fungsi hot_encode
def hot_encode(x):
    if x <= 0:
        return 0
    else:
        return 1


In [8]:
# Mengkodekan dataset
basket_encoded = basket_Cilacap.applymap(hot_encode)
basket_Cilacap = basket_encoded
basket_Cilacap

  basket_encoded = basket_Cilacap.applymap(hot_encode)


deskripsi,BIHUN SUPERIOR,BIHUNKU SEDUH,KREMEZZ,KREMEZZ KREZZ,MIE AYAM 2 TELOR,MIE KREMEZZ STIKZ,MIE SUPERIOR,SHOOR MP,TARO 3D SP,TARO FRENCH FRIES,TARO NET,TARO NET FAMILY PACK,TARO NET SP,TARO PUFF,TARO PUFF SP,WAFER STICK PIO
pelanggan,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2004 KARANGPUCUNG,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0
24 BANTARSARI,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0
24 PS. ADIPALA,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,1
3M BAROKAH JAYA KARANG PUCUNG,1,1,0,1,0,1,1,1,0,0,1,0,0,0,0,0
99 KAWUNGANTEN,0,0,0,1,0,0,1,0,0,0,1,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YUFIS BINANGUN,0,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0
YUNI (S)KROYA,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0
YUS (S)PS. KESUGIHAN,0,0,1,1,0,1,0,1,0,0,0,0,1,0,1,0
YUYUNG LEBENG,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0


In [9]:
# Langkah 6: Membangun model dan menganalisis hasilnya
# 1) Cilacap
# Membangun model
frq_items = apriori(basket_Cilacap, min_support = 0.05, use_colnames = True)

# Mengumpulkan aturan yang disimpulkan dalam dataframe
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
#print(rules.head())
rules.head(10)



Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
187,"(TARO FRENCH FRIES, KREMEZZ KREZZ, TARO NET SP)",(SHOOR MP),0.052823,0.460838,0.051002,0.965517,2.095134,0.026659,15.635701,0.551854
75,"(KREMEZZ, TARO NET SP)",(SHOOR MP),0.063752,0.460838,0.060109,0.942857,2.045963,0.03073,9.435337,0.546044
0,(BIHUN SUPERIOR),(MIE SUPERIOR),0.080146,0.282332,0.074681,0.931818,3.30044,0.052054,10.525804,0.75774
80,"(TARO FRENCH FRIES, KREMEZZ KREZZ)",(SHOOR MP),0.078324,0.460838,0.07286,0.930233,2.018568,0.036765,7.72799,0.54748
98,"(TARO PUFF SP, KREMEZZ KREZZ)",(SHOOR MP),0.063752,0.460838,0.056466,0.885714,1.921965,0.027087,4.717668,0.512363
56,"(BIHUNKU SEDUH, KREMEZZ KREZZ)",(SHOOR MP),0.058288,0.460838,0.051002,0.875,1.898715,0.024141,4.313297,0.502625
16,(KREMEZZ),(SHOOR MP),0.08561,0.460838,0.074681,0.87234,1.892944,0.035229,4.223437,0.515888
93,"(KREMEZZ KREZZ, TARO NET SP)",(SHOOR MP),0.120219,0.460838,0.102004,0.848485,1.841179,0.046602,3.55847,0.519299
134,"(TARO FRENCH FRIES, TARO PUFF SP)",(SHOOR MP),0.061931,0.460838,0.051002,0.823529,1.787026,0.022462,3.055252,0.469487
74,"(KREMEZZ, SHOOR MP)",(TARO NET SP),0.074681,0.459016,0.060109,0.804878,1.753484,0.025829,2.772541,0.464388


tambahan

list pelanggan yang sering beli

In [10]:
def pelanggan_dengan_pembelian_lebih_dari(data, batas_pembelian):
    # Menghitung frekuensi pembelian per pelanggan
    frekuensi_pembelian = data.groupby('pelanggan').size().reset_index(name='frekuensi')

    # Memfilter pelanggan yang memiliki frekuensi pembelian lebih dari batas yang ditentukan
    pelanggan_lebih_dari_batas = frekuensi_pembelian[frekuensi_pembelian['frekuensi'] > batas_pembelian]['pelanggan'].tolist()

    return pelanggan_lebih_dari_batas

# Mendapatkan list pelanggan dengan pembelian lebih dari batas yang diinginkan
batas_pembelian = 50  # Ganti dengan angka berapa saja yang diinginkan
list_pelanggan = pelanggan_dengan_pembelian_lebih_dari(data, batas_pembelian)

# Cetak list pelanggan dalam format tabel
print(f"List pelanggan dengan pembelian lebih dari {batas_pembelian} kali:")
print("---------------------------")
print("| No | Nama Pelanggan     |")
print("---------------------------")
for idx, pelanggan in enumerate(list_pelanggan, start=1):
    print(f"| {idx}  | {pelanggan:<20} |")
print("---------------------------")


List pelanggan dengan pembelian lebih dari 50 kali:
---------------------------
| No | Nama Pelanggan     |
---------------------------
| 1  | AGUS (S) PS. GEDE    |
| 2  | ANEKA SNACK PS MAJENANG |
| 3  | BENTO PS. GEDE       |
| 4  | CV. WARIS MAS GANDRUNG |
| 5  | DEDI PS. TANJUNG     |
| 6  | GINO PS TANJUNG      |
| 7  | IMAM SAFII CILACAP   |
| 8  | IRVAN KROYA          |
| 9  | POJOK SIKAMPUH       |
| 10  | RUDI CLP             |
| 11  | SARWODADI MAJENANG   |
| 12  | SMD SNACKING CILACAP 1 |
| 13  | TIGA CINCIN WIDARAPAYUNG |
---------------------------


rekomendasi

In [11]:
from mlxtend.frequent_patterns import apriori, association_rules

def analyze_customer_purchase_patterns(data, customer_name):
    # Filter data berdasarkan nama pelanggan
    customer_data = data[data['pelanggan'] == customer_name]

    # Jika data pelanggan tidak kosong
    if not customer_data.empty:
        # Melakukan one-hot encoding untuk deskripsi pembelian
        encoded_data = pd.get_dummies(customer_data.set_index('invoice')['deskripsi']).groupby(level='invoice').max()

        # Menerapkan algoritma Apriori
        frequent_itemsets = apriori(encoded_data, min_support=0.05, use_colnames=True)

        # Membuat asosiasi aturan
        rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
        rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
        top_10_rules = rules.head(10)

        # Menghitung rata-rata pembelian per bulan
        customer_data['tanggal'] = pd.to_datetime(customer_data['tanggal'], format='%d/%m/%Y', errors='coerce')
        customer_data['bulan'] = customer_data['tanggal'].dt.to_period('M')
        purchase_mean_per_month = customer_data.groupby('bulan')['quantity(KTN)'].mean().dropna()

        return top_10_rules, purchase_mean_per_month
    else:
        return None, None

# Menggunakan fungsi untuk menganalisis pola pembelian pelanggan tertentu
nama_pelanggan_terpilih = "POJOK SIKAMPUH"
hasil_pola_pembelian, rata_rata_per_bulan = analyze_customer_purchase_patterns(data, nama_pelanggan_terpilih)

if hasil_pola_pembelian is not None and rata_rata_per_bulan is not None:
    print(f"10 aturan teratas pola pembelian untuk pelanggan {nama_pelanggan_terpilih}:")
    print(hasil_pola_pembelian.to_string())

    print(f"Rata-rata pembelian pelanggan {nama_pelanggan_terpilih} per bulan:")
    print(rata_rata_per_bulan.to_string())
else:
    print(f"Tidak ada data pola pembelian untuk pelanggan {nama_pelanggan_terpilih}.")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customer_data['tanggal'] = pd.to_datetime(customer_data['tanggal'], format='%d/%m/%Y', errors='coerce')


10 aturan teratas pola pembelian untuk pelanggan POJOK SIKAMPUH:
                                      antecedents                                   consequents  antecedent support  consequent support   support  confidence       lift  leverage  conviction  zhangs_metric
362            (TARO FRENCH FRIES, BIHUNKU SEDUH)                 (TARO NET, MIE KREMEZZ STIKZ)            0.052632            0.087719  0.052632         1.0  11.400000  0.048015         inf       0.962963
363                 (TARO FRENCH FRIES, TARO NET)            (BIHUNKU SEDUH, MIE KREMEZZ STIKZ)            0.052632            0.087719  0.052632         1.0  11.400000  0.048015         inf       0.962963
581       (TARO FRENCH FRIES, SHOOR MP, TARO NET)            (BIHUNKU SEDUH, MIE KREMEZZ STIKZ)            0.052632            0.087719  0.052632         1.0  11.400000  0.048015         inf       0.962963
582  (TARO FRENCH FRIES, BIHUNKU SEDUH, SHOOR MP)                 (TARO NET, MIE KREMEZZ STIKZ)            0.05

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customer_data['bulan'] = customer_data['tanggal'].dt.to_period('M')


hasil

In [12]:
 def analyze_customer_purchase_patterns(data, customer_name, start_month, end_month, min_support=0.05):
    customer_data = data[
        (data['pelanggan'] == customer_name) &
        (data['tanggal'].dt.to_period('M').between(start_month, end_month))
    ]

    if not customer_data.empty:
        encoded_data = pd.get_dummies(customer_data.set_index('invoice')['deskripsi']).groupby(level='invoice').max()

        frequent_itemsets = apriori(encoded_data, min_support=min_support, use_colnames=True)
        rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
        rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
        # Menyaring 10 aturan teratas berdasarkan confidence dan support
        top_10_rules = rules.nlargest(10, ['confidence', 'support'])

        return top_10_rules
    else:
        return None

# Contoh penggunaan untuk menganalisis pola pembelian dalam rentang bulan untuk pelanggan tertentu
nama_pelanggan_terpilih = "RUDI CLP"
bulan_awal = "2023-03"  # Ubah bulan awal rentang yang ingin dianalisis
bulan_akhir = "2023-09"  # Ubah bulan akhir rentang yang ingin dianalisis
bulan_awal = pd.Period(bulan_awal)
bulan_akhir = pd.Period(bulan_akhir)

hasil_aturan_asosiasi = analyze_customer_purchase_patterns(data, nama_pelanggan_terpilih, bulan_awal, bulan_akhir)

if hasil_aturan_asosiasi is not None:
    print(f"10 Aturan Asosiasi teratas untuk pelanggan {nama_pelanggan_terpilih} pada rentang bulan {bulan_awal} sampai {bulan_akhir}:")
    for index, row in hasil_aturan_asosiasi.iterrows():
        antecedents = row['antecedents']
        consequents = row['consequents']
        confidence = row['confidence']
        support = row['support']

        antecedent_names = ', '.join([str(item) for item in antecedents])
        consequent_names = ', '.join([str(item) for item in consequents])

        print(f"Jika beli {antecedent_names}, maka beli {consequent_names} dengan confidence: {confidence}")
else:
    print(f"Tidak ada data pola pembelian untuk pelanggan {nama_pelanggan_terpilih} pada rentang bulan {bulan_awal} sampai {bulan_akhir}.")



10 Aturan Asosiasi teratas untuk pelanggan RUDI CLP pada rentang bulan 2023-03 sampai 2023-09:
Jika beli KREMEZZ, maka beli SHOOR MP, TARO NET SP dengan confidence: 1.0
Jika beli KREMEZZ, maka beli SHOOR MP dengan confidence: 1.0
Jika beli KREMEZZ, TARO NET SP, maka beli SHOOR MP dengan confidence: 1.0
Jika beli KREMEZZ, maka beli TARO NET SP dengan confidence: 1.0
Jika beli KREMEZZ, SHOOR MP, maka beli TARO NET SP dengan confidence: 1.0
Jika beli SHOOR MP, TARO NET SP, maka beli KREMEZZ dengan confidence: 0.75
Jika beli SHOOR MP, maka beli TARO NET SP dengan confidence: 0.6666666666666666
Jika beli SHOOR MP, maka beli KREMEZZ dengan confidence: 0.5
Jika beli SHOOR MP, maka beli KREMEZZ, TARO NET SP dengan confidence: 0.5
Jika beli TARO NET SP, maka beli SHOOR MP dengan confidence: 0.3333333333333333
