<a href="https://colab.research.google.com/github/Edward-Eughene-Timothy/DQN-RL-for-Hikikomori-Reintegration/blob/testing/Reinforcement_learning_with_Deep_Q_Networks_for_Hikikomori.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gym
!pip install keras-rl2
!pip install tensorflow==2.12.0
!pip install keras==2.12.0
import warnings
warnings.filterwarnings('ignore')



In [None]:
import gym
import numpy as np
from gym import spaces
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers.legacy import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

class HikikomoriEnv(gym.Env):
    def __init__(self):
        super(HikikomoriEnv, self).__init__()

        # Define state space (Social Engagement Level: 0 - 10)
        self.state = 0  # Start at lowest social engagement
        self.max_state = 10

        # Define action space (Task difficulty: 0 = Easy, 1 = Medium, 2 = Hard)
        self.action_space = spaces.Discrete(3)

        # Observation space (Current engagement level)
        self.observation_space = spaces.Box(low=0, high=self.max_state, shape=(1,), dtype=np.int32)

    def step(self, action):
        """
        Actions: 0 (Easy Task), 1 (Medium Task), 2 (Hard Task)
        Rewards are based on task completion (simulated randomly)
        """
        reward = 0
        done = False

        # Simulate user response (higher chance to complete easier tasks)
        success_prob = {0: 0.9, 1: 0.6, 2: 0.3}  # Probability of success per difficulty level
        completed = np.random.rand() < success_prob[action]

        if completed:
            self.state = min(self.state + 1, self.max_state)  # Increase engagement level
            reward = action + 1  # Reward based on task difficulty
        else:
            reward = -1  # Negative reward for failure

        # Check if max engagement level is reached
        if self.state == self.max_state:
            done = True

        return np.array([self.state]), reward, done, {}
    def reset(self):
        """ Reset environment to initial state """
        self.state = 0
        return np.array([self.state])  # Return the initial state observation

    def render(self, mode='human'):  # Add mode argument
        print(f"Current Social Engagement Level: {self.state}")



In [None]:
# Build Deep Q-Network (DQN) model
def build_model(state_shape, action_size):
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + state_shape))  # Flatten input
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(action_size, activation='linear'))  # Output Q-values
    return model

In [None]:
# Build DQN Agent
def build_agent(model, action_size):
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   memory=memory,
                   policy=policy,
                   nb_actions=action_size,
                   nb_steps_warmup=10,
                   target_model_update=1e-2)
    return dqn

In [None]:
if __name__ == "__main__":
    env = HikikomoriEnv()
    state_shape = env.observation_space.shape
    action_size = env.action_space.n
    warnings.filterwarnings('ignore')
    model = build_model(state_shape, action_size)
    dqn = build_agent(model, action_size)
    dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
    print("Training DQN agent...")
    dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)



Training DQN agent...
Training for 50000 steps ...
Interval 1 (0 steps performed)
597 episodes - episode_reward: 10.333 [-2.000, 21.000] - loss: 0.491 - mae: 4.392 - mean_q: 6.422

Interval 2 (10000 steps performed)
591 episodes - episode_reward: 10.481 [-8.000, 21.000] - loss: 0.426 - mae: 4.623 - mean_q: 6.759

Interval 3 (20000 steps performed)
590 episodes - episode_reward: 10.412 [-6.000, 20.000] - loss: 0.430 - mae: 4.638 - mean_q: 6.773

Interval 4 (30000 steps performed)
594 episodes - episode_reward: 10.781 [-9.000, 21.000] - loss: 0.429 - mae: 4.666 - mean_q: 6.817

Interval 5 (40000 steps performed)
done, took 386.875 seconds


In [None]:
print("Testing trained agent...")
scores=dqn.test(env, nb_episodes=100, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing trained agent...
Testing for 100 episodes ...
Current Social Engagement Level: 1
Current Social Engagement Level: 1
Current Social Engagement Level: 2
Current Social Engagement Level: 2
Current Social Engagement Level: 2
Current Social Engagement Level: 2
Current Social Engagement Level: 3
Current Social Engagement Level: 4
Current Social Engagement Level: 4
Current Social Engagement Level: 4
Current Social Engagement Level: 5
Current Social Engagement Level: 6
Current Social Engagement Level: 7
Current Social Engagement Level: 7
Current Social Engagement Level: 8
Current Social Engagement Level: 9
Current Social Engagement Level: 9
Current Social Engagement Level: 9
Current Social Engagement Level: 9
Current Social Engagement Level: 9
Current Social Engagement Level: 10
Episode 1: reward: 9.000, steps: 21
Current Social Engagement Level: 1
Current Social Engagement Level: 2
Current Social Engagement Level: 3
Current Social Engagement Level: 3
Current Social Engagement Level: 3