In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime, timedelta
import random


In [2]:
# Generate dummy user behavior data
def generate_user_data(num_users=100, num_days=30):
    user_data = []

    # Define possible activities and their categories
    activities = {
        'amenities': ['pool', 'spa', 'gym', 'tennis_court', 'business_center'],
        'dining': ['main_restaurant', 'cafe', 'bar', 'room_service', 'buffet'],
        'activities': ['city_tour', 'beach_activity', 'cooking_class', 'yoga', 'golf']
    }

    # Generate random user interactions
    start_date = datetime.now() - timedelta(days=num_days)

    for user_id in range(num_users):
        # User preferences (1-5 rating)
        for category in activities:
            for activity in activities[category]:
                # Not all users will have interactions with all activities
                if random.random() > 0.3:  # 70% chance of having an interaction
                    interaction_date = start_date + timedelta(
                        days=random.randint(0, num_days-1)
                    )
                    user_data.append({
                        'user_id': f'user_{user_id}',
                        'category': category,
                        'activity': activity,
                        'rating': random.randint(1, 5),
                        'time_spent': random.randint(30, 180),  # minutes
                        'date': interaction_date,
                        'time_of_day': random.choice(['morning', 'afternoon', 'evening']),
                        'weekend': interaction_date.weekday() >= 5
                    })

    return pd.DataFrame(user_data)

In [3]:
data = generate_user_data()
data

Unnamed: 0,user_id,category,activity,rating,time_spent,date,time_of_day,weekend
0,user_0,amenities,pool,4,112,2024-12-26 15:08:17.217157,morning,False
1,user_0,amenities,spa,5,95,2024-12-14 15:08:17.217157,evening,True
2,user_0,amenities,tennis_court,2,144,2025-01-02 15:08:17.217157,morning,False
3,user_0,amenities,business_center,3,172,2024-12-28 15:08:17.217157,evening,True
4,user_0,dining,main_restaurant,4,98,2024-12-16 15:08:17.217157,afternoon,False
...,...,...,...,...,...,...,...,...
1077,user_99,dining,buffet,3,177,2024-12-26 15:08:17.217157,evening,False
1078,user_99,activities,beach_activity,1,70,2025-01-01 15:08:17.217157,afternoon,False
1079,user_99,activities,cooking_class,1,38,2024-12-10 15:08:17.217157,evening,False
1080,user_99,activities,yoga,2,171,2024-12-25 15:08:17.217157,afternoon,False


In [5]:
data[data['user_id'] == "user_0"]['activity'].value_counts()

activity
pool               1
spa                1
tennis_court       1
business_center    1
main_restaurant    1
bar                1
room_service       1
buffet             1
city_tour          1
cooking_class      1
yoga               1
golf               1
Name: count, dtype: int64

In [6]:
def build_user_profiles(data,user_profiles=None):
    """Create user profiles based on their interactions and preferences"""
      # Calculate average ratings per activity for each user
    user_profiles = data.pivot_table(
            index='user_id',
            columns='activity',
            values='rating',
            aggfunc='mean'
        ).fillna(0)

        # Add time-based features
    time_spent_profile = data.pivot_table(
            index='user_id',
            columns='activity',
            values='time_spent',
            aggfunc='mean'
        ).fillna(0)

    # Normalize time spent
    time_spent_profile = time_spent_profile / time_spent_profile.max()


    # Combine ratings and time spent with weights
    user_profiles = (user_profiles * 0.7) + (time_spent_profile * 0.3)

    # Calculate similarity matrix
    similarity_matrix = cosine_similarity(user_profiles)
    return similarity_matrix,user_profiles


In [7]:
similarity_matrix, user_profiles = build_user_profiles(data)
similarity_matrix

array([[1.        , 0.72596693, 0.66007571, ..., 0.61482444, 0.57096075,
        0.78470739],
       [0.72596693, 1.        , 0.72187741, ..., 0.66619173, 0.75896116,
        0.56850886],
       [0.66007571, 0.72187741, 1.        , ..., 0.61793502, 0.85663392,
        0.56187224],
       ...,
       [0.61482444, 0.66619173, 0.61793502, ..., 1.        , 0.65114206,
        0.54030342],
       [0.57096075, 0.75896116, 0.85663392, ..., 0.65114206, 1.        ,
        0.43043968],
       [0.78470739, 0.56850886, 0.56187224, ..., 0.54030342, 0.43043968,
        1.        ]])

In [9]:
user_profiles

activity,bar,beach_activity,buffet,business_center,cafe,city_tour,cooking_class,golf,gym,main_restaurant,pool,room_service,spa,tennis_court,yoga
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
user_0,2.193855,0.000000,2.181667,2.386667,0.000000,0.854190,3.779775,3.096648,0.000000,2.965169,2.986667,0.947368,3.661932,1.641341,2.896000
user_1,0.000000,3.722905,1.631667,3.050000,2.276966,2.294413,2.938202,0.000000,2.214607,2.155618,1.631667,1.633333,3.776136,0.788827,2.947429
user_10,0.000000,3.026257,2.945000,0.886667,0.000000,3.595531,1.555056,2.895531,2.206180,0.000000,3.771667,2.887719,2.950000,2.279330,0.000000
user_11,0.000000,2.341341,2.175000,0.000000,3.688764,0.000000,1.487640,0.000000,2.374719,0.902247,0.000000,2.312281,3.581818,1.572626,0.986286
user_12,0.000000,3.736313,0.756667,1.620000,1.627528,1.698324,0.000000,2.853631,2.325843,3.724157,3.663333,3.063158,1.567045,0.000000,3.554857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
user_95,0.974860,3.595531,2.173333,0.970000,0.000000,0.000000,0.000000,0.956425,0.000000,0.000000,1.450000,0.000000,3.590341,1.582682,1.681143
user_96,2.967598,2.218994,2.945000,3.018333,1.686517,3.664246,2.305618,1.624581,0.000000,3.628090,0.000000,0.000000,2.353977,3.553631,0.000000
user_97,2.882123,0.000000,0.000000,1.455000,2.995506,1.674860,1.644382,1.554190,3.623034,0.897191,0.961667,2.275439,1.505682,2.193855,0.883429
user_98,0.000000,3.069832,1.465000,0.936667,2.177528,2.992737,0.000000,2.394972,3.044382,0.767416,3.710000,0.000000,2.319886,0.981564,0.770286


In [10]:
def get_similar_users(data,user_id, n=5,similarity_matrix=None):
  """Find similar users based on behavior patterns"""
  if similarity_matrix is None:
    similarity_matrix,user_profiles = build_user_profiles(data)

  user_idx = user_profiles.index.get_loc(user_id)
  user_similarities = similarity_matrix[user_idx]

  similar_user_indices = user_similarities.argsort()[::-1][1:n+1]
  similar_users = user_profiles.index[similar_user_indices]

  return similar_users

In [11]:
def get_recommendations(data, user_id, category=None, n=5):
  """Generate recommendations for a user"""
  similar_users = get_similar_users(data,user_id)

  # Get activities that similar users rated highly
  similar_users_data = data[data['user_id'].isin(similar_users)]

  if category:
            similar_users_data = similar_users_data[
                similar_users_data['category'] == category
            ]

  # Calculate average ratings for activities
  recommendations = similar_users_data.groupby('activity').agg({
            'rating': 'mean',
            'time_spent': 'mean'
        }).sort_values('rating', ascending=False)

  # Filter out activities the user has already tried
  user_activities = set(data[
            data['user_id'] == user_id
        ]['activity'])
  new_activities = recommendations[
            ~recommendations.index.isin(user_activities)
        ]

  return new_activities.head(n)

In [13]:
new_activities = get_recommendations(data,"user_0")
new_activities

Unnamed: 0_level_0,rating,time_spent
activity,Unnamed: 1_level_1,Unnamed: 2_level_1
cafe,4.0,93.0
beach_activity,2.0,49.0
gym,1.5,80.0


In [14]:
new_activities = get_recommendations(data,"user_1")
new_activities

Unnamed: 0_level_0,rating,time_spent
activity,Unnamed: 1_level_1,Unnamed: 2_level_1
golf,2.5,135.0


In [15]:
similar_users = get_similar_users(data,"user_46")
similar_users

Index(['user_24', 'user_65', 'user_60', 'user_97', 'user_71'], dtype='object', name='user_id')