In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

In [2]:
# Menghasilkan data pelanggan acak
np.random.seed(42)
customer_data = pd.DataFrame({
    'Customer_ID': np.arange(1, 501),
    'Total_Spend': np.random.gamma(shape=2, scale=500, size=500).round(2),
    'Total_Transactions': np.random.poisson(lam=5, size=500),
    'Days_Since_Last_Purchase': np.random.randint(1, 365, size=500)
})

In [3]:
# Segmentasi Pelanggan dengan K-Means Clustering
kmeans = KMeans(n_clusters=5, random_state=42)
customer_data['Segment'] = kmeans.fit_predict(customer_data[['Total_Spend', 'Total_Transactions', 'Days_Since_Last_Purchase']])
silhouette_avg = silhouette_score(customer_data[['Total_Spend', 'Total_Transactions', 'Days_Since_Last_Purchase']], customer_data['Segment'])



In [5]:
# Menghitung CLV (Customer Lifetime Value)
# Asumsi: margin keuntungan rata-rata per transaksi dan rata-rata umur pelanggan
average_profit_margin_per_transaction = 0.10
average_customer_lifespan = 10  # dalam tahun

customer_data['CLV'] = (customer_data['Total_Spend'] * average_profit_margin_per_transaction) * average_customer_lifespan

In [7]:
# Analisis Tingkat Kepuasan Pelanggan
# Asumsi: skor kepuasan didasarkan pada frekuensi transaksi dan hari sejak pembelian terakhir
customer_data['Satisfaction_Score'] = np.where(customer_data['Days_Since_Last_Purchase'] < 30, 'High',
                                                np.where(customer_data['Total_Transactions'] > 5, 'Medium', 'Low'))

In [8]:
# Interpretasi hasil
print(f"Silhouette Score untuk segmentasi: {silhouette_avg:.2f}")
print("Rata-rata CLV per segmen:")
print(customer_data.groupby('Segment')['CLV'].mean())
print("Distribusi Skor Kepuasan Pelanggan:")
print(customer_data['Satisfaction_Score'].value_counts())

Silhouette Score untuk segmentasi: 0.46
Rata-rata CLV per segmen:
Segment
0    1423.074653
1     384.652784
2    3014.337647
3    2119.492558
4     871.397485
Name: CLV, dtype: float64
Distribusi Skor Kepuasan Pelanggan:
Satisfaction_Score
Low       284
Medium    170
High       46
Name: count, dtype: int64


Interpretasi:
Silhouette Score mendekati 1 menunjukkan segmentasi yang baik.
CLV yang lebih tinggi menunjukkan pelanggan yang lebih berharga dalam jangka panjang.
Distribusi skor kepuasan dapat membantu dalam menargetkan upaya peningkatan layanan pelanggan.