In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np

import matplotlib.pyplot as plt
import numpy as np

In [3]:
df = pd.read_csv("Adaptive_UI_Personalization_Dataset-3.csv")

In [4]:
df.head()

Unnamed: 0,App Name,Usage Time (mins),Topic Keywords,Engagement Level,Device,Time of Day,Previous Question Answered,Suggested Question
0,Instagram,28,Cybersecurity,High,Smartphone,Night,Yes,How do you prevent phishing attacks?
1,YouTube,58,Cybersecurity,Medium,Desktop,Morning,Yes,Have you implemented encryption in your projects?
2,Instagram,21,Cybersecurity,High,Smartphone,Evening,Yes,What is your favorite cybersecurity framework?
3,LinkedIn,54,Cybersecurity,High,Tablet,Evening,No,What tools do you recommend for threat detection?
4,LinkedIn,58,Data Science,Low,Tablet,Morning,No,How do you clean messy datasets?


In [5]:
# Encode categorical variables
encoder = LabelEncoder()

categorical_columns = ["App Name", "Topic Keywords", "Engagement Level", "Device", "Time of Day", "Previous Question Answered"]
for col in categorical_columns:
    df[col] = encoder.fit_transform(df[col])

# Features (State Space)
X = df[["App Name", "Usage Time (mins)", "Topic Keywords", "Engagement Level", "Device", "Time of Day", "Previous Question Answered"]].values

# Actions (Suggested Questions)
actions = list(df["Suggested Question"].unique())
action_mapping = {action: idx for idx, action in enumerate(actions)}
df["Action"] = df["Suggested Question"].map(action_mapping)

# Number of states and actions
n_states = len(X)
n_actions = len(actions)

print(f"Number of States: {n_states}, Number of Actions: {n_actions}")

Number of States: 1000, Number of Actions: 15


In [6]:
class ContextualBandit:
    def __init__(self, n_states, n_actions):
        self.n_states = n_states
        self.n_actions = n_actions
        self.q_table = np.zeros((n_states, n_actions))  # Initialize Q-values

    def choose_action(self, state, epsilon=0.1):
        if np.random.rand() < epsilon:  # Exploration
            return np.random.randint(self.n_actions)
        else:  # Exploitation
            return np.argmax(self.q_table[state])

    def update(self, state, action, reward, alpha=0.1):
        # Q-Learning Update
        self.q_table[state, action] += alpha * (reward - self.q_table[state, action])

# Initialize Bandit
bandit = ContextualBandit(n_states, n_actions)

In [7]:
# Simulated Training
n_episodes = 1000
epsilon = 0.1  # Exploration rate
alpha = 0.1    # Learning rate
rewards = []

for episode in range(n_episodes):
    # Randomly sample a state
    state_idx = np.random.randint(0, n_states)
    action_idx = bandit.choose_action(state_idx, epsilon=epsilon)

    # Simulated Reward: High engagement = +1, Medium = 0, Low = -1
    engagement_level = df.iloc[state_idx]["Engagement Level"]
    reward = 1 if engagement_level > 1 else (0 if engagement_level == 1 else -1)

    # Update the Bandit's Q-values
    bandit.update(state_idx, action_idx, reward, alpha)
    rewards.append(reward)

print(f"Training complete. Average reward: {np.mean(rewards):.2f}")

Training complete. Average reward: -0.01


In [20]:
# Test the trained model on a random state
test_state_idx = np.random.randint(0, n_states)
selected_action_idx = bandit.choose_action(test_state_idx, epsilon=0.0)  # Exploitation

# Retrieve the recommended question
recommended_question = actions[selected_action_idx]

print(f"Test State Index: {test_state_idx}")
print(f"Recommended Question: {recommended_question}")

Test State Index: 219
Recommended Question: Have you implemented encryption in your projects?
