In [2]:
import numpy as np
import pandas as pd
import random
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError

In [3]:
# Load datasets
places_df = pd.read_csv('places_v6.csv')  
visitors_df = pd.read_csv('Visitors Preference Dataset.xlsx - user_data_version_3_10K_Users.csv')  


In [4]:
# Normalize the ratings in places_df
scaler = StandardScaler()
places_df['rating'] = scaler.fit_transform(places_df[['rating']])

In [5]:
# Create mappings
place_mapping = {place: idx for idx, place in enumerate(places_df['name'].unique())}
reverse_place_mapping = {idx: place for place, idx in place_mapping.items()}

In [7]:
# Example embedding dimension
embedding_dim = 10

In [8]:
# Dummy embeddings 
activity_embeddings = {activity: np.random.rand(embedding_dim) for activity in visitors_df['Preferred Activities'].explode().unique()}
place_embeddings = {place: np.random.rand(embedding_dim) for place in places_df['categoriess'].explode().unique()}

In [9]:
def calculate_similarity(embedding1, embedding2):
    return cosine_similarity([embedding1], [embedding2])[0][0]

In [15]:
class TouristEnvironment:
    def __init__(self, visitors_df, places_df, activity_embeddings, place_embeddings):
        self.visitors_df = visitors_df
        self.places_df = places_df
        self.activity_embeddings = activity_embeddings
        self.place_embeddings = place_embeddings
        self.visitors = visitors_df['User ID'].unique()
        self.places = places_df['name'].unique()
        self.state = random.choice(self.visitors)

    def reset(self):
        self.state = random.choice(self.visitors)
        return self.state

    def step(self, action):
        place_id = action
        visitor_preferences = self.visitors_df[self.visitors_df['User ID'] == self.state]
        
        # Convert place_id to place name
        place_name = reverse_place_mapping.get(place_id, None)
        if place_name is None:
            raise ValueError(f"Place ID {place_id} not found in reverse_place_mapping.")

        # Match place in places_df
        matched_place = self.places_df[self.places_df['name'] == place_name]
        
        if matched_place.empty:
            # Handle the case where the place is not found
            print(f"No place found for ID: {place_id}. Skipping this action.")
            return self.state, 0  

        # Reward based on rating, matching activities, and bucket list presence
        reward = 0
        if matched_place['rating'].values[0] > 0:
            reward += matched_place['rating'].values[0]

        # Compare activity embeddings
        preferred_activities = visitor_preferences['Preferred Activities'].values[0]
        place_categories = matched_place['categoriess'].values[0]
        for activity in preferred_activities:
            activity_embedding = self.activity_embeddings.get(activity, np.zeros(embedding_dim))
            for category in place_categories:
                place_embedding = self.place_embeddings.get(category, np.zeros(embedding_dim))
                reward += calculate_similarity(activity_embedding, place_embedding)

        # Encode the bucket list and check if the recommended place is on it
        bucket_list = visitor_preferences['Bucket list destinations Sri Lanka'].values[0].split(',')
        bucket_list_encoded = [place_mapping.get(place, -1) for place in bucket_list]

        if place_id in bucket_list_encoded:
            reward += 3 

        self.state = random.choice(self.visitors)  
        return self.state, reward


In [16]:
# Define DQN model
def build_dqn(state_size, action_size):
    model = Sequential([
        Dense(128, input_shape=(1,), activation='relu'),
        Dense(128, activation='relu'),
        Dense(action_size, activation='linear')
    ])
    model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.001))
    return model

In [17]:
# Define DQN agent
class DQNAgent:
    def __init__(self, state_size, action_size, discount_factor=0.9, epsilon=0.1):
        self.state_size = state_size
        self.action_size = action_size
        self.model = build_dqn(state_size, action_size)
        self.discount_factor = discount_factor
        self.epsilon = epsilon

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            return random.choice(range(self.action_size))
        else:
            q_values = self.model.predict(state, verbose=0)
            return np.argmax(q_values[0])

    def learn(self, state, action, reward, next_state):
        target = reward + self.discount_factor * np.amax(self.model.predict(next_state, verbose=0))
        target_f = self.model.predict(state, verbose=0)
        target_f[0][action] = target
        self.model.fit(state, target_f, epochs=1, verbose=0)

In [18]:
# Initialize environment and agent
env = TouristEnvironment(visitors_df, places_df, activity_embeddings, place_embeddings)
agent = DQNAgent(len(visitors_df['User ID'].unique()), len(places_df['name'].unique()))

# Training the DQN agent
episodes = 800

In [19]:
for episode in range(episodes):
    state = env.reset()

    for _ in range(10):
        action = agent.choose_action(np.array([state]))
        next_state, reward = env.step(action)
        agent.learn(np.array([state]), action, reward, np.array([next_state]))
        state = next_state

# Save the trained model
agent.model.save('dqn_tourist_model_final.h5')
print("Model saved as dqn_tourist_model.h5")



Model saved as dqn_tourist_model.h5


In [20]:
# Recommendation functions
def recommend_place_dqn(agent, visitor_id):
    state = np.array([visitor_id])
    action = agent.choose_action(state)
    recommended_place_id = reverse_place_mapping.get(action, "Unknown Place")
    return recommended_place_id

def recommend_top_n_places(agent, visitor_id, top_n=5):
    state = np.array([visitor_id])
    q_values = agent.model.predict(state, verbose=0)[0]
    top_n_places = np.argsort(q_values)[::-1][:top_n]
    top_n_place_ids = [reverse_place_mapping.get(i, "Unknown Place") for i in top_n_places]
    return top_n_place_ids

In [21]:
# Load the saved model and test recommendations
loaded_model = load_model('dqn_tourist_model_final.h5')
loaded_agent = DQNAgent(len(visitors_df['User ID'].unique()), len(places_df['name'].unique()))
loaded_agent.model = loaded_model

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [49]:
# Example
sample_visitor_id = random.choice(visitors_df['User ID'].unique())
recommended_place = recommend_place_dqn(loaded_agent, sample_visitor_id)
print(f"Recommended place for Visitor {sample_visitor_id}: {recommended_place}")


Recommended place for Visitor 6260: Uduwathura Lake


In [23]:
# Recommend top 5 places for the visitor using the loaded model
top_places = recommend_top_n_places(loaded_agent, sample_visitor_id, top_n=5)
print(f"Top 5 recommended places for Visitor {sample_visitor_id}: {top_places}")

Top 5 recommended places for Visitor 6898: ['Uduwathura Lake', 'Dunhinda Waterfall', 'Belihuloya', 'Bathalagoda Tank', 'Galabedda Biso Pond']
