In [1]:
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
user_behavior = pd.read_excel('User_Behavior.xlsx')
user_behavior.head()

Unnamed: 0,ShopID,UserID,Booking_count,Average_Booking_cost
0,10,70,3,4210
1,10,95,1,2010
2,17,92,5,7113
3,12,78,7,13868
4,14,61,2,4894


In [3]:
shop_data = pd.read_excel('Shop_Data.xlsx')
shop_data.head()

Unnamed: 0,ShopID,ShopName,Country,Location_name,Location_long,Location_lat,Description,Shop_categories_main,Shop_categories_sub,Available_package_types,Total_Paid_Ads_Count_Current_Month,Total_Reviews_Count_Current_Month,Total_Bookings_Count_Current_Month
0,1,Shop 1,Sri Lanka,Colombo,80.040023,7.726465,A top-rated beauty and wellness shop.,{'Health & Medical'},"{'Salon', 'Spa'}",{'Skin Care'},47,67,74
1,2,Shop 2,Sri Lanka,Jaffna,80.394852,8.3456,Known for luxury spa and salon services.,{'Beauty & Wellness'},"{'Therapy', 'Fitness'}",{'Bridal Services'},2,50,139
2,3,Shop 3,Sri Lanka,Galle,80.220322,6.43905,Specializes in bridal and event styling.,{'Beauty & Wellness'},"{'Salon', 'Spa'}",{'Bridal Services'},26,66,189
3,4,Shop 4,Sri Lanka,Jaffna,80.78911,6.462746,Provides affordable and quality care.,{'Health & Medical'},{'Bridal Services'},{'Skin Care'},27,1,96
4,5,Shop 5,Sri Lanka,Kandy,80.847519,8.877918,Known for luxury spa and salon services.,{'Health & Medical'},"{'Salon', 'Spa'}",{'Fitness Packages'},48,1,88


In [4]:
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()

main_shop_cat = pd.DataFrame(mlb.fit_transform(shop_data['Shop_categories_main']), columns=mlb.classes_, index=shop_data.index)
sub_shop_cat = pd.DataFrame(mlb.fit_transform(shop_data['Shop_categories_sub']), columns=mlb.classes_, index=shop_data.index)
packages_types = pd.DataFrame(mlb.fit_transform(shop_data['Available_package_types']), columns=mlb.classes_, index=shop_data.index)

# Add binarized data to user_data
shop_data = pd.concat([shop_data, main_shop_cat, sub_shop_cat,packages_types], axis=1)

In [5]:
shop_data = shop_data.drop(['Shop_categories_main', 'Shop_categories_sub', 'Available_package_types'], axis=1)

In [6]:
shop_data.shape

(100, 89)

In [7]:
print("Duplicate columns in X:", shop_data.columns[shop_data.columns.duplicated()].tolist())

Duplicate columns in X: [' ', "'", 'B', 'S', 'a', 'c', 'd', 'e', 'h', 'i', 'l', 'n', 'o', 'p', 'r', 's', 't', 'v', 'y', '{', '}', ' ', "'", ',', 'B', 'C', 'F', 'H', 'S', 'a', 'c', 'd', 'e', 'i', 'l', 'n', 'r', 's', 't', 'v', '{', '}']


In [8]:
shop_data = shop_data.loc[:, ~shop_data.columns.duplicated()]

In [9]:
print("Duplicate columns in X:", shop_data.columns[shop_data.columns.duplicated()].tolist())

Duplicate columns in X: []


In [10]:
shop_data.shape

(100, 47)

In [11]:
shop_data.head()

Unnamed: 0,ShopID,ShopName,Country,Location_name,Location_long,Location_lat,Description,Total_Paid_Ads_Count_Current_Month,Total_Reviews_Count_Current_Month,Total_Bookings_Count_Current_Month,...,},",",C,F,T,m,N,P,g,k
0,1,Shop 1,Sri Lanka,Colombo,80.040023,7.726465,A top-rated beauty and wellness shop.,47,67,74,...,1,1,0,0,0,0,0,0,0,1
1,2,Shop 2,Sri Lanka,Jaffna,80.394852,8.3456,Known for luxury spa and salon services.,2,50,139,...,1,1,0,1,1,0,0,0,0,0
2,3,Shop 3,Sri Lanka,Galle,80.220322,6.43905,Specializes in bridal and event styling.,26,66,189,...,1,1,0,0,0,0,0,0,0,0
3,4,Shop 4,Sri Lanka,Jaffna,80.78911,6.462746,Provides affordable and quality care.,27,1,96,...,1,0,0,0,0,0,0,0,0,1
4,5,Shop 5,Sri Lanka,Kandy,80.847519,8.877918,Known for luxury spa and salon services.,48,1,88,...,1,1,0,0,0,0,0,1,1,1


In [12]:
n = len(user_behavior)
n_shops = len(user_behavior['ShopID'].unique())
n_books = len(user_behavior['Booking_count'].unique())

print(f"Number of unique shopId's: {n_shops}")
print(f"Number of unique users: {n_books}")
print(f"Average bookings per user: {round(n/n_books, 2)}")
print(f"Average shops per user: {round(n/n_shops, 2)}")

Number of unique shopId's: 20
Number of unique users: 19
Average bookings per user: 5.26
Average shops per user: 5.0


In [13]:
# User Rating Frequency

user_freq = user_behavior[['UserID', 'ShopID']].groupby('UserID').count().reset_index()
user_freq.columns = ['UserID', 'Booking_count']

print(user_freq.head())

   UserID  Booking_count
0       1              1
1       2              1
2       3              1
3       4              2
4       5              1


In [14]:
# Now, we create user-item matrix using scipy csr matrix
from scipy.sparse import csr_matrix

def create_matrix(df):
    
    N = len(df['UserID'].unique())
    M = len(df['ShopID'].unique())
    
    # Map Ids to indices
    user_mapper = dict(zip(np.unique(df["UserID"]), list(range(N))))
    shop_mapper = dict(zip(np.unique(df["ShopID"]), list(range(M))))
    
    # Map indices to IDs
    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["UserID"])))
    shop_inv_mapper = dict(zip(list(range(M)), np.unique(df["ShopID"])))
    
    user_index = [user_mapper[i] for i in df['UserID']]
    shop_index = [shop_mapper[i] for i in df['ShopID']]

    X = csr_matrix((df["Booking_count"], (shop_index, user_index)), shape=(M, N))
    
    return X, user_mapper, shop_mapper, user_inv_mapper, shop_inv_mapper
    
X, user_mapper, shop_mapper, user_inv_mapper, shop_inv_mapper = create_matrix(user_behavior)

In [15]:
# new matrix with country

from scipy.sparse import csr_matrix
import numpy as np

def create_matrix(df, country=None):
    """
    Create user-item matrix with an optional country filter.
    """
    if country and shop_data is not None:
        # Filter shops by country
        valid_shop_ids = shop_data[shop_data['Country'] == country]['ShopID']
        df = df[df['ShopID'].isin(valid_shop_ids)]

    N = len(df['UserID'].unique())
    M = len(df['ShopID'].unique())
    
    # Map IDs to indices
    user_mapper = dict(zip(np.unique(df["UserID"]), list(range(N))))
    shop_mapper = dict(zip(np.unique(df["ShopID"]), list(range(M))))
    
    # Map indices to IDs
    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["UserID"])))
    shop_inv_mapper = dict(zip(list(range(M)), np.unique(df["ShopID"])))
    
    user_index = [user_mapper[i] for i in df['UserID']]
    shop_index = [shop_mapper[i] for i in df['ShopID']]

    X = csr_matrix((df["Booking_count"], (shop_index, user_index)), shape=(M, N))
    
    return X, user_mapper, shop_mapper, user_inv_mapper, shop_inv_mapper


In [16]:
X, user_mapper, shop_mapper, user_inv_mapper, shop_inv_mapper = create_matrix(user_behavior,"Sri Lanka")

In [17]:
"""
Find similar shops using KNN
"""
from sklearn.neighbors import NearestNeighbors
def find_similar_shops(shop_id, X, k, metric='cosine', show_distance=False):
    
    neighbour_ids = []
    
    shop_ind = shop_mapper[shop_id]
    shop_vec = X[shop_ind]
    k+=1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
    kNN.fit(X)
    shop_vec = shop_vec.reshape(1,-1)
    neighbour = kNN.kneighbors(shop_vec, return_distance=show_distance)
    for i in range(0,k):
        n = neighbour.item(i)
        neighbour_ids.append(shop_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids


shop_name = dict(zip(shop_data['ShopID'], shop_data['ShopName']))

shop_id = 10

similar_ids = find_similar_shops(shop_id, X, k=10)
movie_title = shop_name[shop_id]

print(f"Since you watched {movie_title}")
print(similar_ids)

Since you watched Shop 10
[8, 4, 13, 19, 18, 17, 16, 15, 14, 1]


### Shops Recommendation with respect to Users Preference

In [18]:
def recommend_shops_for_user(user_id, X, user_mapper, shop_mapper, shop_inv_mapper, k=10):
    # Filter the user behavior data for the given user_id
    df1 = user_behavior[user_behavior['UserID'] == user_id]
    
    if df1.empty:
        print(f"User with ID {user_id} does not exist.")
        return []

    # Find the shop ID with the highest booking count for the user
    shop_id = df1[df1['Booking_count'] == max(df1['Booking_count'])]['ShopID'].iloc[0]

    # Create a mapping of shop IDs to shop names (optional for debugging)
    shop_name_mapping = dict(zip(shop_data['ShopID'], shop_data['ShopName']))

    # Get the name of the shop/movie (optional for debugging)
    shop_name = shop_name_mapping.get(shop_id, "Movie not found")

    if shop_name == "Movie not found":
        print(f"Movie with ID {shop_id} not found.")
        return []

    # Find similar shops/movies by IDs
    similar_ids = find_similar_shops(shop_id, X, k)

    print(f"Since you watched {shop_name} (ID: {shop_id}), you might also like these IDs:")
    # Return the IDs of similar movies or shops
    return similar_ids


In [19]:
user_id = 60  # Replace with the desired user ID
recommend_shops_for_user(user_id, X, user_mapper, shop_mapper, shop_inv_mapper, k=5)

Since you watched Shop 3 (ID: 3), you might also like these IDs:


[9, 1, 15, 18, 13]