In [1]:
# Import library
import numpy as np
import pandas as pd

In [2]:
# Definisi states (kondisi permintaan) dan actions (strategi promosi)
states = ["low_demand", "medium_demand", "high_demand"]
actions = ["discount", "bogo", "free_shipping"]

In [3]:
# Reward Matrix (didefinisikan berdasarkan logika bisnis)
reward_matrix = {
    "low_demand": {"discount": 5, "bogo": 2, "free_shipping": 3},
    "medium_demand": {"discount": 7, "bogo": 5, "free_shipping": 4},
    "high_demand": {"discount": 10, "bogo": 8, "free_shipping": 6},
}

In [4]:
# Q-Table inisialisasi
q_table = np.zeros((len(states), len(actions)))

In [5]:
# Hyperparameter Q-Learning
learning_rate = 0.1
discount_factor = 0.9
epsilon = 0.2  # Untuk epsilon-greedy exploration
episodes = 1000  # Jumlah iterasi pelatihan

In [6]:
# Fungsi untuk mendapatkan reward
def get_reward(state, action):
    return reward_matrix[state][action]

In [7]:
# Q-Learning Training Loop
for episode in range(episodes):
    # Pilih state awal secara acak
    current_state_idx = np.random.choice(range(len(states)))

    for step in range(10):  # Maksimum 10 langkah per episode
        # Pilih aksi menggunakan epsilon-greedy
        if np.random.rand() < epsilon:
            action_idx = np.random.choice(range(len(actions)))  # Eksplorasi
        else:
            action_idx = np.argmax(q_table[current_state_idx])  # Eksploitasi

        # Mendapatkan informasi state, action, dan reward
        current_state = states[current_state_idx]
        action = actions[action_idx]
        reward = get_reward(current_state, action)

        # Pilih state berikutnya secara acak (transisi sederhana)
        next_state_idx = np.random.choice(range(len(states)))

        # Update Q-Table menggunakan rumus Q-Learning
        best_next_action = np.max(q_table[next_state_idx])
        q_table[current_state_idx, action_idx] += learning_rate * (
            reward + discount_factor * best_next_action - q_table[current_state_idx, action_idx]
        )

        # Perbarui state
        current_state_idx = next_state_idx

In [8]:
# Menampilkan Q-Table setelah pelatihan
q_table_df = pd.DataFrame(q_table, index=states, columns=actions)
print("Q-Table setelah pelatihan:")
print(q_table_df)

Q-Table setelah pelatihan:
                discount       bogo  free_shipping
low_demand     69.313701  67.168486      68.728335
medium_demand  71.107282  70.217756      69.267841
high_demand    74.596075  73.148202      71.185888


In [9]:
# Menentukan strategi promosi optimal
print("\nStrategi Promosi Optimal Berdasarkan Permintaan:")
for state_idx, state in enumerate(states):
    best_action_idx = np.argmax(q_table[state_idx])
    best_action = actions[best_action_idx]
    print(f"- Jika {state}: Pilih promosi '{best_action}'")


Strategi Promosi Optimal Berdasarkan Permintaan:
- Jika low_demand: Pilih promosi 'discount'
- Jika medium_demand: Pilih promosi 'discount'
- Jika high_demand: Pilih promosi 'discount'


In [10]:
# Pengujian: Menggunakan Q-Table untuk memprediksi strategi terbaik
def test_q_learning(state):
    if state not in states:
        raise ValueError(f"State '{state}' tidak valid. Pilih dari {states}.")
    state_idx = states.index(state)
    best_action_idx = np.argmax(q_table[state_idx])
    best_action = actions[best_action_idx]
    return best_action

In [11]:
# Pengujian dengan beberapa contoh
print("\nHasil Pengujian:")
test_states = ["low_demand", "medium_demand", "high_demand"]
for test_state in test_states:
    predicted_action = test_q_learning(test_state)
    print(f"- Jika {test_state}: Strategi terbaik adalah '{predicted_action}'")


Hasil Pengujian:
- Jika low_demand: Strategi terbaik adalah 'discount'
- Jika medium_demand: Strategi terbaik adalah 'discount'
- Jika high_demand: Strategi terbaik adalah 'discount'
