In [30]:
import pandas as pd
import numpy as np
from collections import Counter
import tensorflow as tf
import tensorflow_recommenders as tfrs
from tensorflow.keras.layers import Embedding, Dense, Concatenate, Input, Dropout, Flatten
from tensorflow.keras.models import Model
import os
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TensorBoard


In [2]:
# Load dataset
umkm_df = pd.read_csv('data/umkm_data.csv')
investor_df = pd.read_csv('data/investor_data.csv')
user_data = pd.read_csv('data/users_data.csv')

In [3]:
umkm_df = umkm_df.merge(user_data[['user_id', 'lokasi_usaha', 'pertumbuhan_pendapatan']], on='user_id', how='left')

In [5]:
investor_df = investor_df.merge(user_data[['user_id', 'lokasi_usaha']], on='user_id', how='left')

In [4]:
umkm_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100 entries, 0 to 99
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   user_id                 100 non-null    object 
 1   umkm_id                 100 non-null    object 
 2   kategori                100 non-null    object 
 3   model_bisnis            100 non-null    object 
 4   skala                   100 non-null    object 
 5   jangkauan               100 non-null    object 
 6   lokasi_usaha            100 non-null    object 
 7   pertumbuhan_pendapatan  100 non-null    float64
dtypes: float64(1), object(7)
memory usage: 7.0+ KB


In [6]:
investor_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   user_id       100 non-null    object
 1   investor_id   100 non-null    object
 2   kategori      100 non-null    object
 3   model_bisnis  100 non-null    object
 4   skala         100 non-null    object
 5   jangkauan     100 non-null    object
 6   lokasi_usaha  100 non-null    object
dtypes: object(7)
memory usage: 6.2+ KB


In [8]:
investor_df.head()

Unnamed: 0,user_id,investor_id,kategori,model_bisnis,skala,jangkauan
0,0026147b-6cf0-4f4b-9bc4-fe86f28c3df8,0026147b-6cf0-4f4b-9bc4-fe86f28c3df8,Retail,B2C,Mikro,Internasional
1,025e6cbc-1bd3-466a-8c34-56f3264c8597,025e6cbc-1bd3-466a-8c34-56f3264c8597,Kuliner,Franchise,Mikro,Internasional
2,08544abb-6884-44f8-99dd-058407c95376,08544abb-6884-44f8-99dd-058407c95376,Fashion,B2B,Menengah,Lokal
3,09100a78-776e-4ba2-97e6-1abdd82d0004,09100a78-776e-4ba2-97e6-1abdd82d0004,Retail,Dropship,Mikro,Regional
4,0acf62ae-69fd-40c2-8ab6-9020e82bcf2c,0acf62ae-69fd-40c2-8ab6-9020e82bcf2c,Pendidikan,Reseller,Mikro,Lokal


In [3]:
umkm_df.head()

Unnamed: 0,user_id,umkm_id,kategori,model_bisnis,skala,jangkauan
0,055fae89-7b46-4d94-8bd8-2f5be657a889,055fae89-7b46-4d94-8bd8-2f5be657a889,Teknologi,B2C,Mikro,Regional
1,0c4bea2b-f22b-4b5c-8770-d9b2e133b485,0c4bea2b-f22b-4b5c-8770-d9b2e133b485,Kriya,Reseller,Menengah,Regional
2,110c07e6-b253-46b6-8b7e-39467db9f10b,110c07e6-b253-46b6-8b7e-39467db9f10b,Pendidikan,Dropship,Kecil,Lokal
3,119d757a-7af9-4e2f-b403-3263458e7169,119d757a-7af9-4e2f-b403-3263458e7169,Teknologi,Dropship,Kecil,Lokal
4,119f101a-7f8c-4d3b-af80-ba4d0975fe74,119f101a-7f8c-4d3b-af80-ba4d0975fe74,Kuliner,Reseller,Menengah,Internasional


In [8]:
# Fungsi preprocessing untuk ekstrak provinsi dari lokasi_usaha
def extract_provinsi(lokasi):
    if pd.isna(lokasi):  # Tangani nilai kosong
        return None
    try:
        # Asumsikan format "kota, provinsi"
        provinsi = lokasi.split(',')[-1].strip()  # Ambil bagian setelah koma
        return provinsi.lower()  # Case folding
    except:
        return None  # Jika format salah

In [9]:
# Terapkan preprocessing
umkm_df['provinsi'] = umkm_df['lokasi_usaha'].apply(extract_provinsi)

In [11]:
# Fungsi untuk membersihkan teks (case folding dan hapus spasi)
def clean_text(text):
    if pd.isna(text):
        return None
    return text.strip().lower()

In [13]:
investor_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   user_id       100 non-null    object
 1   investor_id   100 non-null    object
 2   kategori      100 non-null    object
 3   model_bisnis  100 non-null    object
 4   skala         100 non-null    object
 5   jangkauan     100 non-null    object
 6   lokasi_usaha  100 non-null    object
dtypes: object(7)
memory usage: 6.2+ KB


In [14]:
# Preprocessing
# Terapkan cleaning untuk semua kolom kategorikal
for col in ['kategori', 'model_bisnis', 'skala', 'jangkauan', 'provinsi']:
    umkm_df[col] = umkm_df[col].apply(clean_text)

for col in ['kategori', 'model_bisnis', 'skala', 'jangkauan', 'lokasi_usaha']:
    investor_df[col] = investor_df[col].apply(clean_text)

In [21]:
# Buat vocabulary untuk setiap kolom
kategori_vocab = list(pd.unique(umkm_df['kategori'].dropna())) 
model_bisnis_vocab = list(pd.unique(umkm_df['model_bisnis'].dropna()))
skala_vocab = list(pd.unique(umkm_df['skala'].dropna()))
jangkauan_vocab = list(pd.unique(umkm_df['jangkauan'].dropna()))
lokasi_vocab = list(pd.unique(umkm_df['provinsi'].dropna()))

In [22]:
# Cetak vocabulary untuk verifikasi
print("Kategori vocab:", kategori_vocab)
print("Model bisnis vocab:", model_bisnis_vocab)

Kategori vocab: ['teknologi', 'kriya', 'pendidikan', 'kuliner', 'retail', 'jasa', 'fashion', 'kesehatan', 'agribisnis']
Model bisnis vocab: ['b2c', 'reseller', 'dropship', 'direct-to-consumer', 'b2b', 'franchise']


In [23]:
# Buat StringLookup untuk mengubah string jadi indeks
kategori_lookup = tf.keras.layers.StringLookup(vocabulary=kategori_vocab, mask_token=None)
model_bisnis_lookup = tf.keras.layers.StringLookup(vocabulary=model_bisnis_vocab, mask_token=None)
skala_lookup = tf.keras.layers.StringLookup(vocabulary=skala_vocab, mask_token=None)
jangkauan_lookup = tf.keras.layers.StringLookup(vocabulary=jangkauan_vocab, mask_token=None)
lokasi_lookup = tf.keras.layers.StringLookup(vocabulary=lokasi_vocab, mask_token=None)

In [27]:
def encode_single_label(text_series, lookup_layer):
    """Encode single-label text to integers"""
    return lookup_layer(text_series.fillna('').astype(str)).numpy()

In [28]:
# Encode UMKM features
umkm_encoded = {
    'kategori': encode_single_label(umkm_df['kategori'], lokasi_lookup),
    'model_bisnis': encode_single_label(umkm_df['model_bisnis'], lokasi_lookup),
    'skala': encode_single_label(umkm_df['skala'], lokasi_lookup),
    'jangkauan': encode_single_label(umkm_df['jangkauan'], lokasi_lookup),
    'lokasi_usaha': encode_single_label(umkm_df['provinsi'], lokasi_lookup),
}

In [29]:
# Encode Investor features
investor_encoded = {
    'kategori': encode_single_label(investor_df['kategori'], lokasi_lookup),
    'model_bisnis': encode_single_label(investor_df['model_bisnis'], lokasi_lookup),
    'skala': encode_single_label(investor_df['skala'], lokasi_lookup),
    'jangkauan': encode_single_label(investor_df['jangkauan'], lokasi_lookup),
    'lokasi_usaha': encode_single_label(investor_df['lokasi_usaha'], lokasi_lookup),
}

In [34]:
def create_umkm_encoder():
    """Create UMKM encoder model"""
    kategori_input = Input(shape=(), dtype=tf.int32, name='kategori')
    model_bisnis_input = Input(shape=(), dtype=tf.int32, name='model_bisnis')
    skala_input = Input(shape=(), dtype=tf.int32, name='skala')
    jangkauan_input = Input(shape=(), dtype=tf.int32, name='jangkauan')
    lokasi_input = Input(shape=(), dtype=tf.int32, name='provinsi')
    # pertumbuhan_pendapatan_input = Input(shape=(), dtype=tf.int32, name='pertumbuhan_pendapatan')

    kategori_emb = Flatten()(Embedding(lokasi_lookup.vocabulary_size(), 16)(kategori_input))
    model_bisnis_emb = Flatten()(Embedding(lokasi_lookup.vocabulary_size(), 16)(model_bisnis_input))
    skala_emb = Flatten()(Embedding(lokasi_lookup.vocabulary_size(), 16)(skala_input))
    jangkauan_emb = Flatten()(Embedding(lokasi_lookup.vocabulary_size(), 16)(jangkauan_input))
    lokasi_emb = Flatten()(Embedding(lokasi_lookup.vocabulary_size(), 16)(lokasi_input))
    # pertumbuhan_expanded = tf.expand_dims(pertumbuhan_pendapatan_input, axis=-1)

    features = Concatenate()([
        kategori_emb, model_bisnis_emb, skala_emb, jangkauan_emb, lokasi_emb
    ])

    x = Dense(128, activation='relu')(features)
    x = Dropout(0.2)(x)
    x = Dense(64, activation='relu')(x)
    output = Dense(32, activation='relu', name='umkm_embedding')(x)

    return Model(inputs=[
        kategori_input, model_bisnis_input, skala_input, jangkauan_input, lokasi_input
    ], outputs=output, name='umkm_encoder')

In [35]:
def create_investor_encoder():
    """Create Investor encoder model"""
    kategori_input = Input(shape=(), dtype=tf.int32, name='kategori')
    model_bisnis_input = Input(shape=(), dtype=tf.int32, name='model_bisnis')
    skala_input = Input(shape=(), dtype=tf.int32, name='skala')
    jangkauan_input = Input(shape=(), dtype=tf.int32, name='jangkauan')
    lokasi_input = Input(shape=(), dtype=tf.int32, name='lokasi_usaha')

    kategori_emb = Flatten()(Embedding(lokasi_lookup.vocabulary_size(), 16)(kategori_input))
    model_bisnis_emb = Flatten()(Embedding(lokasi_lookup.vocabulary_size(), 16)(model_bisnis_input))
    skala_emb = Flatten()(Embedding(lokasi_lookup.vocabulary_size(), 16)(skala_input))
    jangkauan_emb = Flatten()(Embedding(lokasi_lookup.vocabulary_size(), 16)(jangkauan_input))
    lokasi_emb = Flatten()(Embedding(lokasi_lookup.vocabulary_size(), 16)(lokasi_input))


    features = Concatenate()([
        kategori_emb, model_bisnis_emb, skala_emb, jangkauan_emb, lokasi_emb
    ])

    x = Dense(128, activation='relu')(features)
    x = Dropout(0.2)(x)
    x = Dense(64, activation='relu')(x)
    output = Dense(32, activation='relu', name='investor_embedding')(x)

    return Model(inputs=[
        kategori_input, model_bisnis_input, skala_input, jangkauan_input, lokasi_input
    ], outputs=output, name='investor_encoder')

In [36]:
# Create encoder models
umkm_encoder = create_umkm_encoder()
investor_encoder = create_investor_encoder()

print("Models created successfully!")
print(f"UMKM encoder output shape: {umkm_encoder.output_shape}")
print(f"Investor encoder output shape: {investor_encoder.output_shape}")


Models created successfully!
UMKM encoder output shape: (None, 32)
Investor encoder output shape: (None, 32)


In [40]:
def create_dataset():
    """Create TensorFlow datasets using UUIDs"""
    if len(umkm_df) == 0 or len(investor_df) == 0:
        raise ValueError("Empty dataset detected. Check umkm_df and investor_df.")
    
    # Assume UUID columns are named 'umkm_id' and 'investor_id'
    # Adjust these column names if different in your DataFrames
    umkm_dataset = tf.data.Dataset.from_tensor_slices({
        'umkm_id': umkm_df['umkm_id'].astype(str).values,  # Use UUID column
        'kategori': umkm_encoded['kategori'],
        'model_bisnis': umkm_encoded['model_bisnis'],
        'skala': umkm_encoded['skala'],
        'jangkauan': umkm_encoded['jangkauan'],
        'lokasi_usaha': umkm_encoded['lokasi_usaha'],
        'pertumbuhan_pendapatan': umkm_df['pertumbuhan_pendapatan'],
    })
    
    investor_dataset = tf.data.Dataset.from_tensor_slices({
        'investor_id': investor_df['investor_id'].astype(str).values,  # Use UUID column
        'kategori': investor_encoded['kategori'],
        'model_bisnis': investor_encoded['model_bisnis'],
        'skala': investor_encoded['skala'],
        'jangkauan': investor_encoded['jangkauan'],
        'lokasi_usaha': investor_encoded['lokasi_usaha'],
    })
    
    return umkm_dataset, investor_dataset

umkm_ds, investor_ds = create_dataset()
print("Datasets created successfully!")

Datasets created successfully!


In [41]:
# ===============================
# RECOMMENDATION MODEL
# ===============================

class UMKMRecommendationModel(tfrs.Model):
    """Complete UMKM-Investor Recommendation Model"""
    
    def __init__(self, umkm_encoder, investor_encoder, umkm_ds):
        super().__init__()
        self.umkm_encoder = umkm_encoder
        self.investor_encoder = investor_encoder
        
        def map_fn(x):
            umkm_features = [
                x['kategori'],
                x['model_bisnis'],
                x['skala'],
                x['jangkauan'],
                x['lokasi_usaha'],
                # x['pertumbuhan_pendapatan']  # Uncomment if needed
            ]
            umkm_embedding = self.umkm_encoder(umkm_features)
            return umkm_embedding
        
        cached_umkm = umkm_ds.batch(1000).cache()
        self.umkm_candidates = cached_umkm.map(map_fn)
        
        self.retrieval_task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=self.umkm_candidates
            )
        )
    
    def call(self, features):
        """Forward pass"""
        investor_emb = self.investor_encoder([
            features['kategori'],
            features['model_bisnis'],
            features['skala'],
            features['jangkauan'],
            features['lokasi_usaha'],
        ])
        
        umkm_emb = self.umkm_encoder([
            features['kategori'],
            features['model_bisnis'],
            features['skala'],
            features['jangkauan'],
            features['lokasi_usaha'],
      
        ])
        
        return {
            'investor_embedding': investor_emb,
            'umkm_embedding': umkm_emb
        }
    
    def compute_loss(self, features, training=False):
        """Compute retrieval loss"""
        model_output = self(features)
        
        return self.retrieval_task(
            query_embeddings=model_output['investor_embedding'],
            candidate_embeddings=model_output['umkm_embedding']
        )


In [42]:
# ===============================
# TRAINING PREPARATION
# ===============================

def create_training_data():
    """Create training data by combining investor and UMKM data"""
    min_size = min(len(umkm_df), len(investor_df))
    if min_size == 0:
        raise ValueError("No data available for training. Check dataset sizes.")
    
    umkm_sample = umkm_ds.take(min_size)
    investor_sample = investor_ds.take(min_size)
    
    def combine_features(investor_features, umkm_features):
        combined = {}
        combined.update(investor_features)
        combined.update(umkm_features)
        return combined
    
    training_ds = tf.data.Dataset.zip((investor_sample, umkm_sample)).map(combine_features)
    training_ds = training_ds.shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
    
    return training_ds

train_ds = create_training_data()
print("Training data prepared!")

# Initialize and compile model
model = UMKMRecommendationModel(umkm_encoder, investor_encoder, umkm_ds)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

print("Model compiled and ready for training!")


Training data prepared!
Model compiled and ready for training!


In [44]:
# ===============================
# TRAINING
# ===============================

def train_model(epochs=10, checkpoint_dir='checkpoints', log_dir='logs'):
    """Train the recommendation model with callbacks"""
    print(f"Starting training for {epochs} epochs...")
    
    train_size = int(0.8 * len(list(train_ds)))
    if train_size == 0:
        raise ValueError("Training dataset is too small. Increase dataset size or adjust split ratio.")
    
    train_data = train_ds.take(train_size)
    val_data = train_ds.skip(train_size)
    
    os.makedirs(checkpoint_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    
    callbacks = [
        ModelCheckpoint(
            filepath=os.path.join(checkpoint_dir, 'model_best.weights.h5'),
            monitor='val_loss',
            save_best_only=True,
            save_weights_only=True,
            verbose=1
        ),
        EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            verbose=1
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-6,
            verbose=1
        ),
        TensorBoard(
            log_dir=log_dir,
            histogram_freq=1,
            write_graph=True,
            write_images=True
        )
    ]
    
    history = model.fit(
        train_data,
        validation_data=val_data,
        epochs=epochs,
        callbacks=callbacks,
        verbose=1
    )

    # Load best weights after training
    checkpoint_path = os.path.join(checkpoint_dir, 'model_best.weights.h5')
    if os.path.exists(checkpoint_path):
        model.load_weights(checkpoint_path)
        print(f"Loaded best weights from {checkpoint_path}")
    else:
        print("No checkpoint found, using final model weights")
    
    print("Training completed!")
    return history

In [45]:
# ===============================
# RECOMMENDATION FUNCTIONS
# ===============================

def get_recommendations(investor_id, top_k=5):
    """Get top-K UMKM recommendations for an investor using UUID"""
    # Map UUID to DataFrame index
    try:
        investor_idx = investor_df.index[investor_df['investor_id'] == investor_id].tolist()
        if not investor_idx:
            print(f"Invalid investor_id: {investor_id}. Not found in investor_df.")
            return []
        investor_idx = investor_idx[0]  # Take the first match
    except Exception as e:
        print(f"Error finding investor_id {investor_id}: {e}")
        return []
    
    # Get investor features
    investor_features = {
        'kategori': tf.constant([investor_encoded['kategori'][investor_idx]]),
        'model_bisnis': tf.constant([investor_encoded['model_bisnis'][investor_idx]]),
        'skala': tf.constant([investor_encoded['skala'][investor_idx]]),
        'jangkauan': tf.constant([investor_encoded['jangkauan'][investor_idx]]),
        'lokasi_usaha': tf.constant([investor_encoded['lokasi_usaha'][investor_idx]]),
    }
    
    # Get investor embedding
    investor_emb = investor_encoder([
        investor_features['kategori'],
        investor_features['model_bisnis'],
        investor_features['skala'],
        investor_features['jangkauan'],
        investor_features['lokasi_usaha'],
    ])
    
    # Calculate similarity with all UMKMs
    similarities = []
    for i in range(len(umkm_df)):
        umkm_features = [
            tf.constant([umkm_encoded['kategori'][i]]),
            tf.constant([umkm_encoded['model_bisnis'][i]]),
            tf.constant([umkm_encoded['skala'][i]]),
            tf.constant([umkm_encoded['jangkauan'][i]]),
            tf.constant([umkm_encoded['lokasi_usaha'][i]]),
            
        ]
        
        umkm_emb = umkm_encoder(umkm_features)
        # Use tf.keras.losses.cosine_similarity (returns negative cosine similarity, so negate it)
        similarity = -tf.keras.losses.cosine_similarity(investor_emb, umkm_emb).numpy()[0]
        similarities.append((i, similarity))
    
    # Sort by similarity (descending) and return top-K
    similarities.sort(key=lambda x: x[1], reverse=True)
    top_recommendations = similarities[:top_k]
    
    # Convert indices back to UMKM UUIDs
    return [(umkm_df['umkm_id'].iloc[umkm_id], score) for umkm_id, score in top_recommendations]

def display_recommendations(investor_id, top_k=5):
    """Display recommendations in a readable format using UUID"""
    recommendations = get_recommendations(investor_id, top_k)
    
    # Map UUID to DataFrame index for investor preferences
    try:
        investor_idx = investor_df.index[investor_df['investor_id'] == investor_id].tolist()
        if not investor_idx:
            print(f"Invalid investor_id: {investor_id}. Not found in investor_df.")
            return
        investor_idx = investor_idx[0]
    except Exception as e:
        print(f"Error finding investor_id {investor_id}: {e}")
        return
    
    print(f"\n=== Recommendations for Investor {investor_id} ===")
    print(f"Investor preferences:")
    print(f"  Location: {investor_df.iloc[investor_idx]['lokasi_usaha']}")
    print(f"  Category: {investor_df.iloc[investor_idx]['kategori']}")
    print(f"  Category: {investor_df.iloc[investor_idx]['model_bisnis']}")
    print(f"  Scale: {investor_df.iloc[investor_idx]['skala']}")
    print(f"  Jangkauan: {investor_df.iloc[investor_idx]['jangkauan']}")
    # print(f"  Max Investment: {investor_df.iloc[investor_idx]['max_investasi']:,.0f}")
    
    print(f"\nTop {top_k} recommended UMKMs:")
    for rank, (umkm_id, score) in enumerate(recommendations, 1):
        umkm_idx = umkm_df.index[umkm_df['umkm_id'] == umkm_id].tolist()[0]
        umkm_row = umkm_df.iloc[umkm_idx]
        print(f"{rank}. UMKM {umkm_id} (Score: {score:.3f})")
        print(f"   Category: {umkm_row['kategori']}")
        print(f"   Location: {umkm_row['lokasi_usaha']}")
        print(f"   Business model: {umkm_row['model_bisnis']}")
        print(f"   Skala usaha: {umkm_row['skala']}")
        print(f"   Jangkauan: {umkm_row['jangkauan']}")
        print(f"   Pertumbuhan pendapatan: {umkm_row['pertumbuhan_pendapatan']:.2f}%")
        print()

In [46]:
# Run training
history = train_model(epochs=100, checkpoint_dir='checkpoints', log_dir='logs')



Starting training for 100 epochs...
Epoch 1/100
Epoch 1: val_loss improved from inf to 5.54267, saving model to checkpoints\model_best.weights.h5
Epoch 2/100
Epoch 2: val_loss improved from 5.54267 to 5.53991, saving model to checkpoints\model_best.weights.h5
Epoch 3/100
Epoch 3: val_loss improved from 5.53991 to 5.53368, saving model to checkpoints\model_best.weights.h5
Epoch 4/100
Epoch 4: val_loss improved from 5.53368 to 5.52452, saving model to checkpoints\model_best.weights.h5
Epoch 5/100
Epoch 5: val_loss improved from 5.52452 to 5.52217, saving model to checkpoints\model_best.weights.h5
Epoch 6/100
Epoch 6: val_loss improved from 5.52217 to 5.47745, saving model to checkpoints\model_best.weights.h5
Epoch 7/100
Epoch 7: val_loss improved from 5.47745 to 5.41824, saving model to checkpoints\model_best.weights.h5
Epoch 8/100
Epoch 8: val_loss improved from 5.41824 to 5.29634, saving model to checkpoints\model_best.weights.h5
Epoch 9/100
Epoch 9: val_loss did not improve from 5.296

In [47]:
# Load best weights if available
checkpoint_path = 'checkpoints/model_best.weights.h5'
if os.path.exists(checkpoint_path):
    model.load_weights(checkpoint_path)
    print(f"Loaded best weights from {checkpoint_path}")
else:
    print("No checkpoint found, using current model weights")

# Display recommendations for a specific investor
investor_id = '0026147b-6cf0-4f4b-9bc4-fe86f28c3df8'  # Replace with a valid UUID from investor_df
top_k = 5
display_recommendations(investor_id=investor_id, top_k=top_k)

Loaded best weights from checkpoints/model_best.weights.h5

=== Recommendations for Investor 0026147b-6cf0-4f4b-9bc4-fe86f28c3df8 ===
Investor preferences:
  Location: jawa
  Category: retail
  Category: b2c
  Scale: mikro
  Jangkauan: internasional

Top 5 recommended UMKMs:
1. UMKM 313d2bc3-eaf8-4beb-925d-777095ee9db5 (Score: 0.904)
   Category: teknologi
   Location: Yogyakarta, DI Yogyakarta
   Business model: dropship
   Skala usaha: mikro
   Jangkauan: nasional
   Pertumbuhan pendapatan: 0.23%

2. UMKM 62767b5d-8b8f-4f14-b1e4-e5829b3204ce (Score: 0.904)
   Category: kuliner
   Location: Yogyakarta, DI Yogyakarta
   Business model: b2c
   Skala usaha: kecil
   Jangkauan: lokal
   Pertumbuhan pendapatan: 0.48%

3. UMKM 8c1bd284-e129-4d8d-a610-29ba2806895d (Score: 0.904)
   Category: kuliner
   Location: Yogyakarta, DI Yogyakarta
   Business model: dropship
   Skala usaha: kecil
   Jangkauan: regional
   Pertumbuhan pendapatan: 0.26%

4. UMKM b3174faa-de30-43be-801a-cf97fce55661 (Sco