In [1]:
cd /content/drive/My Drive

/content/drive/My Drive


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import random
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

In [3]:
data = pd.read_csv('events.csv') # Read dataset

In [4]:
data = data.sample(frac=0.25) # Decrease dataset
data = data.reset_index(drop=True) # Reset index

In [5]:
data.shape[0]

689025

In [6]:
data.head(10)

Unnamed: 0,timestamp,visitorid,event,itemid,transactionid
0,1441411776933,392721,view,344955,
1,1437511037836,896347,transaction,449391,8717.0
2,1434425891777,802320,view,33947,
3,1432913421666,1195450,view,294654,
4,1432749075468,467340,view,217763,
5,1433206540245,530831,view,340982,
6,1433695722215,1043520,view,322955,
7,1442244250540,1315473,view,55834,
8,1442023391576,498977,view,150445,
9,1432765417679,692829,view,138430,


In [7]:
# Initializing Q-Table
unique_states = data['visitorid'].unique()
unique_actions = data['itemid'].unique()
num_states = len(unique_states)
num_actions = len(unique_actions)
q_table = np.zeros((num_states, num_actions))

# Q-Learning
def train_q_learning_model(data):


    unique_states = data['visitorid'].unique()
    unique_actions = data['itemid'].unique()
    num_states = len(unique_states)
    num_actions = len(unique_actions)
    q_table = np.zeros((num_states, num_actions))


    learning_rate = 0.1
    discount_rate = 0.9

    for visitor_id, visitor_group in data.groupby('visitorid'):
        session = visitor_group['itemid'].values
        for i in range(len(session) - 1):
            state = np.where(unique_states == visitor_id)[0][0]
            action = np.where(unique_actions == session[i + 1])[0][0]
            max_next_q_value = np.max(q_table[state, :])
            new_q_value = (1 - learning_rate) * q_table[state, action] + learning_rate * (session[-1] + discount_rate * max_next_q_value)
            q_table[state, action] = new_q_value

    return q_table

In [8]:
# Predict next item
def predict_next_item(visitor_id, unique_states, unique_actions, q_table):
    state_indices = np.where(unique_states == visitor_id)[0]
    if len(state_indices) > 0:
        state = state_indices[0]
        q_values = q_table[state, :]
        predicted_next_item = unique_actions[np.argmax(q_values)]
        return predicted_next_item
    else:
        return None

# Learing Q-Learning
q_table = train_q_learning_model(data)

# Recommend next item for special user
visitor_id = 550439
predicted_item = predict_next_item(visitor_id, unique_states, unique_actions, q_table)
print("Recommend next item:", predicted_item)

# Predict next 5 items
def recommend_items(visitor_id, unique_states, unique_actions, q_table, num_recommendations=5):
    state_indices = np.where(unique_states == visitor_id)[0]
    if len(state_indices) > 0:
        state = state_indices[0]
        q_values = q_table[state, :]
        top_indices = np.argsort(q_values)[::-1][:num_recommendations]
        recommended_items = unique_actions[top_indices]
        return recommended_items
    else:
        return None


# Recommend 5 items
visitor_id = 550439
recommended_items = recommend_items(visitor_id, unique_states, unique_actions, q_table, num_recommendations=5)
print("Recommend 5 items:", recommended_items)

Recommend next item: 11957
Recommend 5 items: [ 11957 369702 281604 250046 201583]


In [9]:
# Split the dataset into training and test sets
train_data, test_data = train_test_split(data, test_size=0.2)

# Extract unique states and actions from the training set
unique_states_train = train_data['visitorid'].unique()
unique_actions_train = train_data['itemid'].unique()

# Train Q-learning model on the training set
q_table = train_q_learning_model(train_data)

# Define a function to evaluate the model on the test set
def evaluate_model(test_data, unique_states, unique_actions, q_table):
    true_sequences = []
    predicted_sequences = []

    for visitor_id, visitor_group in test_data.groupby('visitorid'):
        # Exclude visitors with less than 5 sessions
        if visitor_group.shape[0] < 5:
            continue

        session = visitor_group['itemid'].values[:-5]  # Exclude the last 5 items as they have no next items
        true_next_items = visitor_group['itemid'].values[-5:]  # The last 5 items are the true next items

        predicted_items = []
        for i in range(5):
            predicted_item = predict_next_item(visitor_id, unique_states, unique_actions, q_table)
            if predicted_item is not None:
                predicted_items.append(predicted_item)

        if len(predicted_items) == 5:  # Ensure the predicted sequence has 5 items
            true_sequences.append(true_next_items)
            predicted_sequences.append(predicted_items)

    true_sequences = np.array(true_sequences)
    predicted_sequences = np.array(predicted_sequences)

    accuracy = accuracy_score(true_sequences.ravel(), predicted_sequences.ravel())
    f1 = f1_score(true_sequences.ravel(), predicted_sequences.ravel(), average='weighted')

    return accuracy, f1

In [10]:
# Evaluate the model on the test set
accuracy, f1 = evaluate_model(test_data, unique_states_train, unique_actions_train, q_table)

print("Accuracy:", accuracy)
print("F1 Score:", f1)

Accuracy: 0.13700305810397553
F1 Score: 0.10268934758950948
