<a href="https://colab.research.google.com/github/RoyalWeaboo/Data-Mining_Tugas-Akhir/blob/main/clustering_perhutani.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import re
from sklearn.cluster import KMeans

In [None]:
# Fungsi untuk mengubah data total pembayaran (Rp x.xxx.xxx) menjadi numerik
def preprocess_transaction_amount(amount):
    amount = re.sub(r'[^\d.]', '', amount)
    amount = amount.replace('.', '')
    amount = amount[:-2]
    return int(amount)

# Membaca dataset
data = pd.read_csv('dataset_perhutani.csv', encoding='ISO-8859-1')

# Mengubah data total pembayaran menjadi numerik
data['Amount Req'] = data['Amount Req'].apply(preprocess_transaction_amount)

# Menjumlahkan total seluruh pembayaran di TPK(Tempat Penimbunan Kayu) yang sama
total_amount_data = data.groupby(['TPK'], as_index=False)['Amount Req'].sum()

# Mengelompokan data yang akan digunakan untuk clustering berdasarkan TPK
transaction_amount = total_amount_data['Amount Req']
mgr = data.groupby(['TPK'], as_index=False).first()['MGR']
tpk = total_amount_data['TPK']

# Mengubah MGR & TPK menjadi numerik
mgr_encoded, mgr_mapping = pd.factorize(mgr)
tpk_encoded, tpk_mapping = pd.factorize(tpk)

print(total_amount_data)

                                 TPK  Amount Req
0             4121101 - TPK CIMANGGU  2281914568
1            4121102 - TPK KALIWANGI    23392372
2                4121104 - TPK LOANO   145520106
3               4121105 - TPK DEMPES   162765744
4                4121107 - TPK TALOK  1534798678
..                               ...         ...
140    4141330 - TPKH HANJUANG TIMUR   427864106
141    4141331 - TPKH HANJUANG BARAT    84987116
142   4141332 - TPKH HANJUANG TENGAH   336128202
143  4141333 - TPKH HANJUANG SELATAN   133272318
144         4141334 - TPKH PASIR AWI   561495444

[145 rows x 2 columns]


In [None]:
print(mgr)
print(mgr_encoded)

0      MGR PEKALONGAN
1      MGR PEKALONGAN
2      MGR PEKALONGAN
3      MGR PEKALONGAN
4      MGR PEKALONGAN
            ...      
140         MGR BOGOR
141         MGR BOGOR
142         MGR BOGOR
143         MGR BOGOR
144         MGR BOGOR
Name: MGR, Length: 145, dtype: object
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6]


In [None]:
print(tpk)
print(tpk_encoded)

0               4121101 - TPK CIMANGGU
1              4121102 - TPK KALIWANGI
2                  4121104 - TPK LOANO
3                 4121105 - TPK DEMPES
4                  4121107 - TPK TALOK
                    ...               
140      4141330 - TPKH HANJUANG TIMUR
141      4141331 - TPKH HANJUANG BARAT
142     4141332 - TPKH HANJUANG TENGAH
143    4141333 - TPKH HANJUANG SELATAN
144           4141334 - TPKH PASIR AWI
Name: TPK, Length: 145, dtype: object
[  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 1

In [None]:
# Membuat dataframe berdasarkan jumlah pembayaran transaksi
features = pd.DataFrame({'transaction_amount': transaction_amount})

# Menambahkan MGR dan TPK yang sudah diubah ke numerik
features['mgr_encoded'] = mgr_encoded
features['tpk_encoded'] = tpk_encoded

print(features)

     transaction_amount  mgr_encoded  tpk_encoded
0            2281914568            0            0
1              23392372            0            1
2             145520106            0            2
3             162765744            0            3
4            1534798678            0            4
..                  ...          ...          ...
140           427864106            6          140
141            84987116            6          141
142           336128202            6          142
143           133272318            6          143
144           561495444            6          144

[145 rows x 3 columns]


In [None]:
# Melakukan Kluster menggunakan KMeans
num_clusters = 6
kmeans = KMeans(n_clusters=num_clusters)
kmeans.fit(features)

# Menetapkan label cluster untuk setiap data
cluster_labels = kmeans.labels_

# Menambahkan label kluster ke dataframe
features['cluster_label'] = cluster_labels

print(features)

     transaction_amount  mgr_encoded  tpk_encoded  cluster_label
0            2281914568            0            0              3
1              23392372            0            1              0
2             145520106            0            2              0
3             162765744            0            3              0
4            1534798678            0            4              0
..                  ...          ...          ...            ...
140           427864106            6          140              0
141            84987116            6          141              0
142           336128202            6          142              0
143           133272318            6          143              0
144           561495444            6          144              0

[145 rows x 4 columns]


