In [35]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import joblib
from collections import defaultdict

# Generate dummy data
def generate_dummy_data(num_rows=10000):
    user_ids = [f"user_{i}" for i in range(100)]  # 100 unique users
    items = ['buku', 'pulpen', 'air galon', 'snack', 'jaket', 'payung', 'tas']
    payment_methods = ['VA Mandiri', 'VA BNI', 'VA BRI', 'VA BSI', 'VA BTN']
    
    data = []
    for i in range(num_rows):
        user_id = random.choice(user_ids)
        transaction_id = f"trans_{i}"
        num_items = random.randint(1, 3)
        transaction_items = random.sample(items, num_items)
        payment_amount = round(random.uniform(100000, 1000000), 2)
        date = datetime.now() - timedelta(days=random.randint(0, 365))
        payment_method = random.choice(payment_methods)
        
        data.append([user_id, transaction_id, transaction_items, payment_amount, date, payment_method])
    
    df = pd.DataFrame(data, columns=['user_id', 'transaction_id', 'items', 'payment_amount', 'date', 'payment_method'])
    return df

# Q-learning agent
class QLearningAgent:
    def __init__(self, state_size, action_size, learning_rate=0.1, discount_factor=0.95, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.9995):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.q_table = defaultdict(self.default_q_value)

    def default_q_value(self):
        return np.zeros(self.action_size)
    
    def get_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.randint(self.action_size)
        return np.argmax(self.q_table[state])
    
    def update_q_table(self, state, action, reward, next_state):
        current_q = self.q_table[state][action]
        next_max_q = np.max(self.q_table[next_state])
        new_q = current_q + self.learning_rate * (reward + self.discount_factor * next_max_q - current_q)
        self.q_table[state][action] = new_q
    
    def decay_epsilon(self):
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

    def learn_from_new_data(self, new_data, num_episodes=1000, batch_size=32):
        for episode in range(num_episodes):
            batch = new_data.sample(n=batch_size, replace=True)
            for _, row in batch.iterrows():
                state = get_state(row)
                action = self.get_action(state)
                next_state = get_state(new_data.iloc[np.random.randint(len(new_data))])
                reward = 1 if action == row['payment_method_encoded'] else -1
                self.update_q_table(state, action, reward, next_state)
            
            self.decay_epsilon()
            
            if episode % 100 == 0:
                print(f"Episode: {episode}, Epsilon: {self.epsilon:.4f}")

# Define state
def get_state(row):
    return (row['user_id_encoded'], 
            tuple(row['items_encoded']), 
            row['payment_amount'],
            row['day_of_week'],
            row['month'])

# Preprocess data
def preprocess_data(df):
    le_user = LabelEncoder()
    le_item = LabelEncoder()
    le_method = LabelEncoder()
    scaler = StandardScaler()

    df['user_id_encoded'] = le_user.fit_transform(df['user_id'])
    df['items_encoded'] = df['items'].apply(lambda x: le_item.fit_transform(x))
    df['payment_method_encoded'] = le_method.fit_transform(df['payment_method'])

    df['day_of_week'] = df['date'].dt.dayofweek
    df['month'] = df['date'].dt.month

    df[['payment_amount', 'day_of_week', 'month']] = scaler.fit_transform(df[['payment_amount', 'day_of_week', 'month']])

    return df, le_user, le_item, le_method, scaler

# Process new data
def process_new_data(new_data, le_user, le_item, le_method, scaler):
    new_data['user_id_encoded'] = le_user.transform(new_data['user_id'])
    new_data['items_encoded'] = new_data['items'].apply(lambda x: le_item.transform([item for item in x if item in le_item.classes_]))
    new_data['payment_method_encoded'] = le_method.transform(new_data['payment_method'])
    new_data['day_of_week'] = new_data['date'].dt.dayofweek
    new_data['month'] = new_data['date'].dt.month
    new_data[['payment_amount', 'day_of_week', 'month']] = scaler.transform(new_data[['payment_amount', 'day_of_week', 'month']])
    return new_data

# Train the model
def train_model(df, num_episodes=10000, batch_size=32):
    agent = QLearningAgent(state_size=None, action_size=3)

    for episode in range(num_episodes):
        batch = df.sample(batch_size)
        for _, row in batch.iterrows():
            state = get_state(row)
            action = agent.get_action(state)
            next_state = get_state(df.iloc[np.random.randint(len(df))])
            reward = 1 if action == row['payment_method_encoded'] else -1
            agent.update_q_table(state, action, reward, next_state)
        
        agent.decay_epsilon()
        
        if episode % 100 == 0:
            print(f"Episode: {episode}, Epsilon: {agent.epsilon:.4f}")

    return agent

# Evaluate the model
def evaluate_model(agent, data):
    correct_predictions = 0
    for _, row in data.iterrows():
        state = get_state(row)
        action = agent.get_action(state)
        if action == row['payment_method_encoded']:
            correct_predictions += 1
    
    accuracy = correct_predictions / len(data)
    return accuracy

# Continuous learning
def continuous_learning(agent, le_user, le_item, le_method, scaler):
    while True:
        # In a real scenario, this would be new data received from your system
        new_data = generate_dummy_data(1000)
        processed_new_data = process_new_data(new_data, le_user, le_item, le_method, scaler)
        
        print("Received new data. Learning from it...")
        agent.learn_from_new_data(processed_new_data)
        
        accuracy = evaluate_model(agent, processed_new_data)
        print(f"Accuracy on new data: {accuracy:.4f}")
        
        user_input = input("Press Enter to process next batch of data, or 'q' to quit: ")
        if user_input.lower() == 'q':
            break

# Main execution
if __name__ == "__main__":
    # Generate initial data
    df = generate_dummy_data(10000)
    print(df)

    # Preprocess data
    df, le_user, le_item, le_method, scaler = preprocess_data(df)

    # Split data
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

    # Train the model
    agent = train_model(train_df)

    # Evaluate on test data
    test_accuracy = evaluate_model(agent, test_df)
    print(f"Accuracy on test data: {test_accuracy:.4f}")

    # Start continuous learning
    continuous_learning(agent, le_user, le_item, le_method, scaler)

    # Save the model and preprocessors
    try:
        joblib.dump(agent, 'q_learning_agent.joblib')
        print("Agent saved successfully.")
    except Exception as e:
        print(f"Error saving agent: {e}")

    joblib.dump(le_user, 'le_user.joblib')
    joblib.dump(le_item, 'le_item.joblib')
    joblib.dump(le_method, 'le_method.joblib')
    joblib.dump(scaler, 'scaler.joblib')
    print("Preprocessors saved successfully.")

# Loading the model (for future use)
def load_model():
    agent = joblib.load('q_learning_agent.joblib')
    le_user = joblib.load('le_user.joblib')
    le_item = joblib.load('le_item.joblib')
    le_method = joblib.load('le_method.joblib')
    scaler = joblib.load('scaler.joblib')
    return agent, le_user, le_item, le_method, scaler

      user_id transaction_id                    items  payment_amount  \
0     user_54        trans_0                 [payung]       839023.28   
1     user_82        trans_1          [pulpen, snack]       115433.21   
2     user_94        trans_2  [snack, payung, pulpen]       472886.31   
3     user_98        trans_3      [tas, snack, jaket]       974511.98   
4     user_33        trans_4                 [pulpen]       426191.61   
...       ...            ...                      ...             ...   
9995   user_2     trans_9995                 [pulpen]       229704.51   
9996  user_17     trans_9996                 [pulpen]       427062.18   
9997  user_67     trans_9997          [payung, jaket]       517338.51   
9998  user_94     trans_9998    [jaket, buku, payung]       607032.80   
9999   user_8     trans_9999              [air galon]       548051.20   

                           date payment_method  
0    2024-08-21 22:03:12.598092         VA BTN  
1    2023-12-21 22:03:12.

Press Enter to process next batch of data, or 'q' to quit:  


Received new data. Learning from it...
Episode: 0, Epsilon: 0.0100
Episode: 100, Epsilon: 0.0100
Episode: 200, Epsilon: 0.0100
Episode: 300, Epsilon: 0.0100
Episode: 400, Epsilon: 0.0100
Episode: 500, Epsilon: 0.0100
Episode: 600, Epsilon: 0.0100
Episode: 700, Epsilon: 0.0100
Episode: 800, Epsilon: 0.0100
Episode: 900, Epsilon: 0.0100
Accuracy on new data: 0.6020


Press Enter to process next batch of data, or 'q' to quit:  1


Received new data. Learning from it...
Episode: 0, Epsilon: 0.0100
Episode: 100, Epsilon: 0.0100
Episode: 200, Epsilon: 0.0100
Episode: 300, Epsilon: 0.0100
Episode: 400, Epsilon: 0.0100
Episode: 500, Epsilon: 0.0100
Episode: 600, Epsilon: 0.0100
Episode: 700, Epsilon: 0.0100
Episode: 800, Epsilon: 0.0100
Episode: 900, Epsilon: 0.0100
Accuracy on new data: 0.5950


Press Enter to process next batch of data, or 'q' to quit:  q


Agent saved successfully.
Preprocessors saved successfully.


In [40]:
# Test with single user input
def test_single_input(agent, le_user, le_item, le_method, scaler):
    user_input = {
        'user_id': 'user_3',
        'items': ['buku', 'pulpen'],
        'payment_amount': 100000,
        'date': datetime.now()
    }

    print('User ID:', user_input['user_id'])
    print('Items:', user_input['items'])
    print('Payment smount:', user_input['payment_amount'])
    print('Date of transaction:', user_input['date'])

    state = process_single_record(user_input, le_user, le_item, scaler)
    action = agent.get_action(state)
    predicted_payment_method = le_method.inverse_transform([action])[0]
    
    print(f"Predicted payment method: {predicted_payment_method}")

# Main execution
if __name__ == "__main__":
    # Load the model and preprocessors
    agent, le_user, le_item, le_method, scaler = load_model()

    # Test with single input
    test_single_input(agent, le_user, le_item, le_method, scaler)

User ID: user_3
Items: ['buku', 'pulpen']
Payment smount: 100000
Date of transaction: 2024-09-22 22:06:18.133949
Predicted payment method: VA BNI


