In [1]:
import pandas as pd

In [2]:
auto_bus = pd.read_csv("Automotive business")
auto_rv = pd.read_csv("Automotive review")

In [3]:
# must run

import ast
auto_bus["categories"] = auto_bus["categories"].apply(ast.literal_eval)

In [8]:
!pip install fuzzywuzzy

Collecting fuzzywuzzy
  Obtaining dependency information for fuzzywuzzy from https://files.pythonhosted.org/packages/43/ff/74f23998ad2f93b945c0309f825be92e04e0348e062026998b5eefef4c33/fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


---

In [9]:
from fuzzywuzzy import process
class collaborative_filtering:
    def __init__(self, business_data, review_data):
        self.review_data = review_data
        self.business_data = business_data
        
#         import ast
#         self.business_data['categories'] = self.business_data['categories'].apply(ast.literal_eval)

    def create_utility_matrix(self):
        from scipy.sparse import csr_matrix
        import numpy as np

        M = self.review_data['user_id'].nunique()
        N = self.review_data['business_id'].nunique()

        user_mapper = dict(zip(np.unique(self.review_data["user_id"]), list(range(M))))
        business_mapper = dict(zip(np.unique(self.review_data["business_id"]), list(range(N))))

        user_inv_mapper = dict(zip(list(range(M)), np.unique(self.review_data["user_id"])))
        business_inv_mapper = dict(zip(list(range(N)), np.unique(self.review_data["business_id"])))

        user_index = [user_mapper[i] for i in self.review_data['user_id']]
        item_index = [business_mapper[i] for i in self.review_data['business_id']]

        X = csr_matrix((self.review_data["stars"], (user_index, item_index)), shape=(M, N))

        return X, user_mapper, business_mapper, user_inv_mapper, business_inv_mapper

    def evaluating_sparsity(self, X):
        n_total = X.shape[0] * X.shape[1]
        n_ratings = X.nnz
        sparsity = n_ratings / n_total
        print(f"Matrix sparsity: {round(sparsity * 100, 2)}%")

    def find_similar_business(self, business_id, X, business_mapper, business_inv_mapper, metric_in, k=None):
        from sklearn.neighbors import NearestNeighbors
        import numpy as np

        X = X.T
        neighbour_ids = []

        business_ind = business_mapper[business_id]
        business_vec = X[business_ind]
        if isinstance(business_vec, (np.ndarray)):
            business_vec = business_vec.reshape(1, -1)
        kNN = NearestNeighbors(n_neighbors=k + 1, algorithm="brute", metric=metric_in)
        kNN.fit(X)
        neighbour = kNN.kneighbors(business_vec, return_distance=False)
        for i in range(0, k):
            n = neighbour.item(i)
            neighbour_ids.append(business_inv_mapper[n])
        neighbour_ids.pop(0)
        return neighbour_ids

    def business_finder(self, name):
        from fuzzywuzzy import process

        all_names = self.business_data.loc[:, 'name'].tolist()
        closest_match = process.extractOne(name, all_names)
        return closest_match[0]

    def automate_work(self, name):
        real_name = self.business_finder(name)
        filt = self.business_data.loc[:, "name"] == real_name
        business_id = self.business_data.loc[filt, "business_id"].values[0]

        business_titles = dict(zip(self.business_data['business_id'], self.business_data['name']))
        X, user_mapper, business_mapper, user_inv_mapper, business_inv_mapper = self.create_utility_matrix()
        similar_business = self.find_similar_business(business_id, X, business_mapper, business_inv_mapper, metric_in="cosine", k=10)

        business_title = business_titles[business_id]

        print(f"Because you searched {business_title}:")
        for i in similar_business:
            print(business_titles[i])
        
        print("\n")
        about_business = pd.DataFrame(columns=list(self.business_data.columns))
        count = 0
        for i in similar_business:
            filt = self.business_data.loc[:, "business_id"] == i
            about_business.loc[count] = self.business_data.loc[filt].iloc[0]
            count += 1
        
        return about_business




In [10]:
rc = collaborative_filtering(business_data=auto_bus , review_data=auto_rv)

In [15]:
rc.automate_work("prime")

Because you searched Prime Auto Glass:
Discount Tire
Sinclair Choice
AMP Performance
World Famous Glasshouse Carwash
Discount Tire Store - Henderson
Arizona Lucky Star Transmissions
Cactus Auto Repair
Budget Rent-A-Car - Chandler
Discount Tire




Unnamed: 0,business_id,full_address,hours,open,categories,city,review_count,name,neighborhoods,longitude,state,stars,latitude,attributes,type
0,UAR9XXVVTQ5shxvU80C1dA,"1130 W Elliot Rd\nTempe, AZ 85284","{'Monday': {'close': '18:00', 'open': '08:00'}...",True,"[Tires, Automotive, Wheel & Rim Repair, Auto P...",Tempe,21,Discount Tire,[],-111.956978,AZ,4.0,33.349571,{},business
1,duGNV-2mgDkfW3HNTRz-Sg,"8095 S Rainbow Blvd\nSouthwest\nLas Vegas, NV ...","{'Monday': {'close': '18:00', 'open': '08:00'}...",True,"[Car Wash, Automotive, Gas & Service Stations,...",Las Vegas,29,Sinclair Choice,['Southwest'],-115.243519,NV,3.5,36.041531,{},business
2,dwp3aysZ7Mocgv9wI3TxGw,"4346 E University Dr\nPhoenix, AZ 85034",{},True,"[Auto Repair, Automotive]",Phoenix,3,AMP Performance,[],-111.987313,AZ,3.5,33.422074,{},business
3,dpDZsDi7tM50n6tzv1_VRA,"1550 W Oakey Blvd\nLas Vegas, NV 89102","{'Monday': {'close': '18:00', 'open': '09:30'}...",True,"[Car Wash, Automotive]",Las Vegas,5,World Famous Glasshouse Carwash,[],-115.161287,NV,4.0,36.152791,{},business
4,dqAi0vPIDcy6JcszeafrBw,"4661 E Sunset Rd\nHenderson, NV 89014","{'Monday': {'close': '18:00', 'open': '08:00'}...",True,"[Tires, Automotive, Wheel & Rim Repair]",Henderson,23,Discount Tire Store - Henderson,[],-115.072189,NV,4.5,36.070379,{},business
5,dqfFjm-GyiN_Ni53tK_SvQ,"424 W Guadalupe Rd\nMesa, AZ 85210",{},True,"[Auto Repair, Automotive]",Mesa,3,Arizona Lucky Star Transmissions,[],-111.842963,AZ,3.5,33.36478,{},business
6,dsaE8qzCFAlMTJ64o77JKA,"748 E Dunlap\nPhoenix, AZ 85020","{'Tuesday': {'close': '17:00', 'open': '08:00'...",True,"[Auto Repair, Automotive, Oil Change Stations]",Phoenix,5,Cactus Auto Repair,[],-112.062851,AZ,3.5,33.568131,{},business
7,dh5dG1OBJ6Hy4RopyOmfZQ,"2950 S Alma School Rd\nChandler, AZ 85286",{},True,"[Hotels & Travel, Car Rental]",Chandler,4,Budget Rent-A-Car - Chandler,[],-111.858859,AZ,4.0,33.262483,{},business
8,e0wJd1deF4Wl-etBC7nBgQ,"13213 W McDowell Rd\nGoodyear, AZ 85338","{'Monday': {'close': '18:00', 'open': '08:00'}...",True,"[Tires, Automotive, Wheel & Rim Repair, Auto P...",Goodyear,17,Discount Tire,[],-112.343287,AZ,4.0,33.463982,{},business


In [88]:
auto_bus.columns

Index(['business_id', 'full_address', 'hours', 'open', 'categories', 'city',
       'review_count', 'name', 'neighborhoods', 'longitude', 'state', 'stars',
       'latitude', 'attributes', 'type'],
      dtype='object')

In [16]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from fuzzywuzzy import process

class ContentBased:
    def __init__(self, business_data):
        self.business_data = business_data
        
        import pandas as pd
        from sklearn.metrics.pairwise import cosine_similarity
        from fuzzywuzzy import process
    
    def transformer_matrix(self):
        # Collect all unique categories
        categories = set()
        for i in range(len(self.business_data)):
            for j in self.business_data.loc[i, "categories"]:
                categories.add(j)
        
        # Create the transformation matrix by dropping non-category columns
        for g in categories:
            self.business_data[g] = self.business_data.categories.transform(lambda x: int(g in x))
        
        
        drop_columns = ['business_id', 'full_address', 'hours', 'open', 'categories', 'city',
       'review_count', 'name', 'neighborhoods', 'longitude', 'state', 'stars',
       'latitude', 'attributes', 'type']
        business_categories_t = self.business_data.drop(columns=drop_columns)
        
        return business_categories_t
    
    def cosine_model(self, business_categories_t):
        cosine_sim = cosine_similarity(business_categories_t, business_categories_t)
        return cosine_sim
    
    def business_finder(self, name):
        all_names = self.business_data['name'].tolist()
        closest_match = process.extractOne(name, all_names)
        return closest_match[0]
    
    def get_content_based_recommendations(self, title_string, cosine_sim, n_recommendations=10):
        business_idx = dict(zip(self.business_data['name'], list(self.business_data.index)))
        title = self.business_finder(title_string)
        idx = business_idx[title]
        
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:(n_recommendations+1)]
        similar_businesses = [i[0] for i in sim_scores]
        
        print(f"Because you searched {title}:")
        print("\n")
        
        list_of_business_related = self.business_data['name'].iloc[similar_businesses].tolist()
        for i in list_of_business_related:
            print(i)
            
        return list_of_business_related
        
        
    def automate_work_cb(self, name):
        business_categories_t = self.transformer_matrix()
        cosine_sim = self.cosine_model(business_categories_t)
        list_of_business_related = self.get_content_based_recommendations(name, cosine_sim)
        
        about_business = pd.DataFrame(columns=list(self.business_data.columns))
        for count, business_name in enumerate(list_of_business_related):
            # Filter the business_data to find the matching business
            filt = self.business_data['name'] == business_name
            if not self.business_data.loc[filt].empty:
                about_business.loc[count] = self.business_data.loc[filt].iloc[0]
        
        return about_business


In [17]:
cb = ContentBased(auto_bus)

In [19]:
cb.automate_work_cb("hyundai")

Because you searched Earnhardt Avondale Hyundai:


Auto-Europa
Big Bell Road Kia
Bell Lexus North Scottsdale
Arrowhead Cadillac
Porsche North Scottsdale
Audi North Scottsdale
Big Two Mitsubishi
Superstition Springs Lexus
Biddulph Mazda
Autonation Honda East Las Vegas


Unnamed: 0,business_id,full_address,hours,open,categories,city,review_count,name,neighborhoods,longitude,...,Boat Repair,Automotive,Wholesale Stores,Carpet Cleaning,Gas & Service Stations,RV Dealers,Boating,Parking,Motorcycle Gear,Professional Services
0,3jBkt2zeO1t0mUUBJMzTJg,"1901 E Indian School Rd\nPhoenix, AZ 85016",{},True,"[Auto Repair, Automotive, Car Dealers]",Phoenix,5,Auto-Europa,[],-112.040837,...,0,1,0,0,0,0,0,0,0,0
1,MNMThMYpXa_UXZHQEEYS1g,"2121 E Bell Rd\nPhoenix, AZ 85022","{'Monday': {'close': '20:00', 'open': '09:00'}...",False,"[Auto Repair, Automotive, Car Dealers]",Phoenix,6,Big Bell Road Kia,[],-112.036759,...,0,1,0,0,0,0,0,0,0,0
2,1pYcTdg1n7_S867aR4SAvw,"18555 N Scottsdale Rd\nScottsdale, AZ 85255","{'Monday': {'close': '20:00', 'open': '08:00'}...",True,"[Auto Repair, Automotive, Car Dealers]",Scottsdale,15,Bell Lexus North Scottsdale,[],-111.924552,...,0,1,0,0,0,0,0,0,0,0
3,XwAqViYLsjOpxM-h57rTEA,"8310 W Bell Rd\nGlendale, AZ 85308","{'Monday': {'close': '20:00', 'open': '08:00'}...",True,"[Auto Repair, Automotive, Car Dealers]",Glendale,17,Arrowhead Cadillac,[],-112.231972,...,0,1,0,0,0,0,0,0,0,0
4,K9B7eIUDM2i1pvj38oE3pg,"18000 N. Scottsdale Road\nPhoenix, AZ 85054","{'Monday': {'close': '19:00', 'open': '08:00'}...",True,"[Auto Repair, Automotive, Car Dealers]",Phoenix,16,Porsche North Scottsdale,[],-111.926386,...,0,1,0,0,0,0,0,0,0,0
5,rCVaj4xZKS7Af89p2JjcpQ,"18088 N. Scottsdale Road\nPhoenix, AZ 85054","{'Monday': {'close': '20:00', 'open': '08:00'}...",True,"[Auto Repair, Automotive, Car Dealers]",Phoenix,48,Audi North Scottsdale,[],-111.925964,...,0,1,0,0,0,0,0,0,0,0
6,MQQGfrno3ily0SgCd67JFw,"6222 E Auto Park Dr\nMesa, AZ 85206","{'Monday': {'close': '21:00', 'open': '08:00'}...",True,"[Auto Repair, Automotive, Car Dealers]",Mesa,7,Big Two Mitsubishi,[],-111.69593,...,0,1,0,0,0,0,0,0,0,0
7,vCHHSc3yXsWHiJYuZOb3cw,"6206 E Test Dr\nMesa, AZ 85206","{'Monday': {'close': '19:00', 'open': '07:00'}...",True,"[Auto Repair, Automotive, Car Dealers]",Mesa,14,Superstition Springs Lexus,[],-111.698011,...,0,1,0,0,0,0,0,0,0,0
8,_SNUn-zCwX_r-xaztpSjpg,"8424 W Bell Rd\nPeoria, AZ 85382","{'Monday': {'close': '21:00', 'open': '08:00'}...",False,"[Auto Repair, Automotive, Car Dealers]",Peoria,7,Biddulph Mazda,[],-112.243053,...,0,1,0,0,0,0,0,0,0,0
9,j1vRNq9WcwPBTLz67EOhMw,"1700 E Sahara Ave\nDowntown\nLas Vegas, NV 89104","{'Monday': {'close': '21:00', 'open': '08:00'}...",True,"[Auto Repair, Automotive, Car Dealers]",Las Vegas,42,Autonation Honda East Las Vegas,['Downtown'],-115.127109,...,0,1,0,0,0,0,0,0,0,0


# END

In [18]:
list(auto_bus.iloc[0:5 , :].loc[:,"hours"])

["{'Monday': {'close': '20:00', 'open': '08:30'}, 'Tuesday': {'close': '18:00', 'open': '08:30'}, 'Friday': {'close': '18:00', 'open': '08:30'}, 'Wednesday': {'close': '18:00', 'open': '08:30'}, 'Thursday': {'close': '18:00', 'open': '08:30'}, 'Saturday': {'close': '16:00', 'open': '08:30'}}",
 "{'Tuesday': {'close': '19:00', 'open': '07:00'}, 'Friday': {'close': '19:00', 'open': '07:00'}, 'Thursday': {'close': '19:00', 'open': '07:00'}, 'Monday': {'close': '19:00', 'open': '07:00'}, 'Wednesday': {'close': '19:00', 'open': '07:00'}}",
 "{'Friday': {'close': '12:30', 'open': '07:30'}, 'Tuesday': {'close': '17:30', 'open': '07:30'}, 'Monday': {'close': '17:30', 'open': '07:30'}, 'Wednesday': {'close': '17:30', 'open': '07:30'}, 'Thursday': {'close': '17:30', 'open': '07:30'}}",
 '{}',
 "{'Monday': {'close': '20:00', 'open': '08:30'}, 'Tuesday': {'close': '18:00', 'open': '08:30'}, 'Friday': {'close': '18:00', 'open': '08:30'}, 'Wednesday': {'close': '18:00', 'open': '08:30'}, 'Thursday':

In [19]:
import ast

# Sample data in the hours column
hours_data = [
    "{'Monday': {'close': '20:00', 'open': '08:30'}, 'Tuesday': {'close': '18:00', 'open': '08:30'}, 'Friday': {'close': '18:00', 'open': '08:30'}, 'Wednesday': {'close': '18:00', 'open': '08:30'}, 'Thursday': {'close': '18:00', 'open': '08:30'}, 'Saturday': {'close': '16:00', 'open': '08:30'}}",
    "{'Tuesday': {'close': '19:00', 'open': '07:00'}, 'Friday': {'close': '19:00', 'open': '07:00'}, 'Thursday': {'close': '19:00', 'open': '07:00'}, 'Monday': {'close': '19:00', 'open': '07:00'}, 'Wednesday': {'close': '19:00', 'open': '07:00'}}",
    "{'Friday': {'close': '12:30', 'open': '07:30'}, 'Tuesday': {'close': '17:30', 'open': '07:30'}, 'Monday': {'close': '17:30', 'open': '07:30'}, 'Wednesday': {'close': '17:30', 'open': '07:30'}, 'Thursday': {'close': '17:30', 'open': '07:30'}}",
    '{}',
    "{'Monday': {'close': '20:00', 'open': '08:30'}, 'Tuesday': {'close': '18:00', 'open': '08:30'}, 'Friday': {'close': '18:00', 'open': '08:30'}, 'Wednesday': {'close': '18:00', 'open': '08:30'}, 'Thursday': {'close': '20:00', 'open': '08:30'}, 'Saturday': {'close': '16:00', 'open': '08:30'}}"
]

# Convert each string to a dictionary
hours_data_dicts = [ast.literal_eval(item) for item in hours_data]

# Display the result
for hours_dict in hours_data_dicts:
    print(hours_dict)


{'Monday': {'close': '20:00', 'open': '08:30'}, 'Tuesday': {'close': '18:00', 'open': '08:30'}, 'Friday': {'close': '18:00', 'open': '08:30'}, 'Wednesday': {'close': '18:00', 'open': '08:30'}, 'Thursday': {'close': '18:00', 'open': '08:30'}, 'Saturday': {'close': '16:00', 'open': '08:30'}}
{'Tuesday': {'close': '19:00', 'open': '07:00'}, 'Friday': {'close': '19:00', 'open': '07:00'}, 'Thursday': {'close': '19:00', 'open': '07:00'}, 'Monday': {'close': '19:00', 'open': '07:00'}, 'Wednesday': {'close': '19:00', 'open': '07:00'}}
{'Friday': {'close': '12:30', 'open': '07:30'}, 'Tuesday': {'close': '17:30', 'open': '07:30'}, 'Monday': {'close': '17:30', 'open': '07:30'}, 'Wednesday': {'close': '17:30', 'open': '07:30'}, 'Thursday': {'close': '17:30', 'open': '07:30'}}
{}
{'Monday': {'close': '20:00', 'open': '08:30'}, 'Tuesday': {'close': '18:00', 'open': '08:30'}, 'Friday': {'close': '18:00', 'open': '08:30'}, 'Wednesday': {'close': '18:00', 'open': '08:30'}, 'Thursday': {'close': '20:00'

In [20]:
import copy

data = copy.deepcopy(auto_bus)

In [22]:
data["hours"] = data["hours"].apply(ast.literal_eval)

In [26]:
data.loc[:,["business_id" , "name" , "hours"]]

Unnamed: 0,business_id,name,hours
0,9brW55Vzyi3ItboHWfoepg,Zimbrick Acura,"{'Monday': {'close': '20:00', 'open': '08:30'}..."
1,LOp0ciu7XhZwTqZUWG8qCw,Bennett's Auto Repair LLC,"{'Tuesday': {'close': '19:00', 'open': '07:00'..."
2,O16EckCLtNoA49hewGht_w,Dunn's Import Inc,"{'Friday': {'close': '12:30', 'open': '07:30'}..."
3,qNBvH111qgJgLYUScbph7g,Magic Wash Inc,{}
4,uyp8kpTXKfKXvAB8HdJSAA,Zimbrick Volkswagen,"{'Monday': {'close': '20:00', 'open': '08:30'}..."
...,...,...,...
2197,e-1HA74HRThULkQIuz1t3A,Surfer Express Car Wash,{}
2198,TFdUHR5EnAPR_vgZaZo3wA,Clean Freak Car Wash,"{'Monday': {'close': '19:00', 'open': '07:00'}..."
2199,-A-Dy_u25DTds1bdERFu5w,BMW Only,"{'Monday': {'close': '18:00', 'open': '09:00'}..."
2200,2yWjRrN3vzAoESuYkMI6Wg,Smartfilm,"{'Monday': {'close': '17:00', 'open': '08:00'}..."


In [None]:
# class collaborative_filtering:
#     def __init__(self , business_data , review_data):
#         self.review_data = review_data
#         self.business_data = business_data
    
    
#     def create_utility_matrix(self):
#         from scipy.sparse import csr_matrix
#         import numpy as np

#         """
#         Generates a sparse matrix from starss dataframe. --> stars data only ==> collaborative filtering

#         Args:
#             self.review_data: pandas dataframe containing 3 columns (user_id, business_id, stars)

#         Returns:
#             X: sparse matrix
#             user_mapper: dict that maps user id's to user indices
#             user_inv_mapper: dict that maps user indices to user id's
#             business_mapper: dict that maps business id's to business indices
#             business_inv_mapper: dict that maps business indices to business id's
#         """
#         M = self.review_data['user_id'].nunique()
#         N = self.review_data['business_id'].nunique()

#         user_mapper = dict(zip(np.unique(self.review_data["user_id"]), list(range(M))))
#         business_mapper = dict(zip(np.unique(self.review_data["business_id"]), list(range(N))))

#         user_inv_mapper = dict(zip(list(range(M)), np.unique(self.review_data["user_id"])))
#         business_inv_mapper = dict(zip(list(range(N)), np.unique(self.review_data["business_id"])))

#         user_index = [user_mapper[i] for i in self.review_data['user_id']]
#         item_index = [business_mapper[i] for i in self.review_data['business_id']]

#         X = csr_matrix((self.review_data["stars"], (user_index,item_index)), shape=(M,N))

#         return X, user_mapper, business_mapper, user_inv_mapper, business_inv_mapper
    
    
#     def evaluating_sparsity(self , X):
#         n_total = X.shape[0]*X.shape[1]
#         n_ratings = X.nnz
#         sparsity = n_ratings/n_total
#         print(f"Matrix sparsity: {round(sparsity*100,2)}%")
        
    

#     def find_similar_business(business_id, X, business_mapper, business_inv_mapper, metric_in, k = None):
        
#         from sklearn.neighbors import NearestNeighbors
        
#         """
#         Finds k-nearest neighbours for a given business id.

#         Args:
#             business_id: id of the business of interest
#             X: user-item utility matrix
#             k: number of similar businesss to retrieve
#             metric: distance metric for kNN calculations

#         Output: returns list of k similar business ID's
#         """
#         X = X.T
#         neighbour_ids = []

#         business_ind = business_mapper[business_id]
#         business_vec = X[business_ind]
#         if isinstance(business_vec, (np.ndarray)):
#             business_vec = business_vec.reshape(1,-1)
#         # use k+1 since kNN output includes the businessId of interest
#         kNN = NearestNeighbors(n_neighbors=k+1, algorithm="brute", metric=metric_in)
#         kNN.fit(X)
#         neighbour = kNN.kneighbors(business_vec, return_distance=False)
#         for i in range(0,k):
#             n = neighbour.item(i)
#             neighbour_ids.append(business_inv_mapper[n])
#         neighbour_ids.pop(0)
#         return neighbour_ids
    

#     def business_finder(self , name):
#         from fuzzywuzzy import process
        
#         all_names = self.business_data.loc[:,'name'].tolist()
#         closest_match = process.extractOne(name,all_names)
#         return closest_match[0]
    
#     def automate_work(self , name):
#         real_name = self.business_finder(name)
#         filt = self.business_data.loc[:,"name"] == real_name
#         business_id = self.business_data.loc[filt , "business_id"]
        
#         business_titles = dict(zip(self.business_data['business_id'], self.business_data['name']))
#         X, user_mapper, business_mapper, user_inv_mapper, business_inv_mapper = self.create_utility_matrix()
#         similar_business = self.find_similar_business(business_id, X, business_mapper, business_inv_mapper, metric_in = "cosine", k=10)
        
#         business_title = business_titles[business_id]

#         print(f"Because you searched {business_title}:")
#         for i in similar_business:
#             print(business_titles[i])

In [14]:

class collaborative_filtering:
    """
    A class for implementing collaborative filtering based recommendation system using business and review data.

    Attributes
    ----------
    business_data : DataFrame
        A pandas DataFrame containing business information including business IDs and names.
    review_data : DataFrame
        A pandas DataFrame containing review information including user IDs, business IDs, and ratings.

    Methods
    -------
    create_utility_matrix():
        Creates a user-item utility matrix from the review data and returns the matrix along with mappers for users and businesses.
    
    evaluating_sparsity(X):
        Evaluates and prints the sparsity of the utility matrix X.
    
    find_similar_business(business_id, X, business_mapper, business_inv_mapper, metric_in, k=None):
        Finds and returns a list of business IDs similar to the given business ID based on the specified metric.
    
    business_finder(name):
        Finds and returns the closest matching business name for the provided input using fuzzy string matching.
    
    automate_work(name):
        Automates the process of finding similar businesses to the one provided by name and returns a DataFrame with detailed information about these similar businesses.
    """

    
    def __init__(self, business_data, review_data):
        self.review_data = review_data
        self.business_data = business_data
        
        import pandas as pd
        import ast
        
    def create_utility_matrix(self):
        """
        Creates a user-item utility matrix from the review data.

        Returns
        -------
        X : csr_matrix
            A sparse matrix representing user ratings for businesses.
        user_mapper : dict
            A dictionary mapping user IDs to matrix indices.
        business_mapper : dict
            A dictionary mapping business IDs to matrix indices.
        user_inv_mapper : dict
            A dictionary mapping matrix indices back to user IDs.
        business_inv_mapper : dict
            A dictionary mapping matrix indices back to business IDs.
        """
        from scipy.sparse import csr_matrix
        import numpy as np

        M = self.review_data['user_id'].nunique()
        N = self.review_data['business_id'].nunique()

        user_mapper = dict(zip(np.unique(self.review_data["user_id"]), list(range(M))))
        business_mapper = dict(zip(np.unique(self.review_data["business_id"]), list(range(N))))

        user_inv_mapper = dict(zip(list(range(M)), np.unique(self.review_data["user_id"])))
        business_inv_mapper = dict(zip(list(range(N)), np.unique(self.review_data["business_id"])))

        user_index = [user_mapper[i] for i in self.review_data['user_id']]
        item_index = [business_mapper[i] for i in self.review_data['business_id']]

        X = csr_matrix((self.review_data["stars"], (user_index, item_index)), shape=(M, N))

        return X, user_mapper, business_mapper, user_inv_mapper, business_inv_mapper

    def evaluating_sparsity(self, X):
        """
        Evaluates and prints the sparsity of the utility matrix X.

        Parameters
        ----------
        X : csr_matrix
            The utility matrix to evaluate sparsity for.
        """
        n_total = X.shape[0] * X.shape[1]
        n_ratings = X.nnz
        sparsity = n_ratings / n_total
        print(f"Matrix sparsity: {round(sparsity * 100, 2)}%")

    def find_similar_business(self, business_id, X, business_mapper, business_inv_mapper, metric_in, k=None):
        """
        Finds and returns a list of business IDs similar to the given business ID based on the specified metric.

        Parameters
        ----------
        business_id : str
            The ID of the business to find similarities for.
        X : csr_matrix
            The utility matrix with user ratings.
        business_mapper : dict
            A dictionary mapping business IDs to matrix indices.
        business_inv_mapper : dict
            A dictionary mapping matrix indices back to business IDs.
        metric_in : str
            The distance metric to use for finding similarities.
        k : int, optional
            The number of similar businesses to find (default is None).

        Returns
        -------
        neighbour_ids : list
            A list of similar business IDs.
        """
        from sklearn.neighbors import NearestNeighbors
        import numpy as np

        X = X.T
        neighbour_ids = []

        business_ind = business_mapper[business_id]
        business_vec = X[business_ind]
        if isinstance(business_vec, (np.ndarray)):
            business_vec = business_vec.reshape(1, -1)
        kNN = NearestNeighbors(n_neighbors=k + 1, algorithm="brute", metric=metric_in)
        kNN.fit(X)
        neighbour = kNN.kneighbors(business_vec, return_distance=False)
        for i in range(0, k):
            n = neighbour.item(i)
            neighbour_ids.append(business_inv_mapper[n])
        neighbour_ids.pop(0)
        return neighbour_ids

    def business_finder(self, name):
        """
        Finds and returns the closest matching business name for the provided input using fuzzy string matching.

        Parameters
        ----------
        name : str
            The name of the business to find.

        Returns
        -------
        closest_match : str
            The closest matching business name.
        """
        from fuzzywuzzy import process

        all_names = self.business_data.loc[:, 'name'].tolist()
        closest_match = process.extractOne(name, all_names)
        return closest_match[0]

    def automate_work(self, name):
        """
        Automates the process of finding similar businesses to the one provided by name.

        Parameters
        ----------
        name : str
            The name of the business to find similar businesses for.

        Returns
        -------
        about_business : DataFrame
            A pandas DataFrame containing detailed information about the similar businesses.
        """
        real_name = self.business_finder(name)
        filt = self.business_data.loc[:, "name"] == real_name
        business_id = self.business_data.loc[filt, "business_id"].values[0]

        business_titles = dict(zip(self.business_data['business_id'], self.business_data['name']))
        X, user_mapper, business_mapper, user_inv_mapper, business_inv_mapper = self.create_utility_matrix()
        similar_business = self.find_similar_business(business_id, X, business_mapper, business_inv_mapper, metric_in="cosine", k=10)

        business_title = business_titles[business_id]

        print(f"Because you searched {business_title}:")
        for i in similar_business:
            print(business_titles[i])
        
        print("\n")
        about_business = pd.DataFrame(columns=list(self.business_data.columns))
        count = 0
        for i in similar_business:
            filt = self.business_data.loc[:, "business_id"] == i
            about_business.loc[count] = self.business_data.loc[filt].iloc[0]
            count += 1
        
        return about_business



class ContentBased:
    """
    A class for implementing content-based recommendation system using business data.

    Attributes
    ----------
    business_data : DataFrame
        A pandas DataFrame containing business information including business IDs, names, and categories.

    Methods
    -------
    transformer_matrix():
        Transforms the business data into a matrix where each row represents a business and each column represents a category, with binary values indicating the presence of a category.
    
    cosine_model(business_categories_t):
        Computes the cosine similarity matrix for the transformed business data.
    
    business_finder(name):
        Finds and returns the closest matching business name for the provided input using fuzzy string matching.
    
    get_content_based_recommendations(title_string, cosine_sim, n_recommendations=10):
        Generates and prints a list of business names that are similar to the given business name based on content similarity.
    
    automate_work_cb(name):
        Automates the process of finding content-based similar businesses to the one provided by name and returns a DataFrame with detailed information about these similar businesses.
    """
    
    
    def __init__(self, business_data):
        """
        Initializes the ContentBased class with business data.

        Parameters
        ----------
        business_data : DataFrame
            A pandas DataFrame containing business information.
        """
        self.business_data = business_data
        
        import pandas as pd
        from sklearn.metrics.pairwise import cosine_similarity
        from fuzzywuzzy import process
    
    def transformer_matrix(self):
        """
        Transforms the business data into a binary matrix representing categories for each business.

        Returns
        -------
        business_categories_t : DataFrame
            A transformed DataFrame where each row represents a business and each column represents a category with binary values.
        """
        # Collect all unique categories
        categories = set()
        for i in range(len(self.business_data)):
            for j in self.business_data.loc[i, "categories"]:
                categories.add(j)
        
        # Create the transformation matrix by dropping non-category columns
        for g in categories:
            self.business_data[g] = self.business_data.categories.transform(lambda x: int(g in x))
        
        
        drop_columns = ['business_id', 'full_address', 'hours', 'open', 'categories', 'city',
       'review_count', 'name', 'neighborhoods', 'longitude', 'state', 'stars',
       'latitude', 'attributes', 'type']
        business_categories_t = self.business_data.drop(columns=drop_columns)
        
        return business_categories_t
    
    def cosine_model(self, business_categories_t):
        """
        Computes the cosine similarity matrix for the transformed business data.

        Parameters
        ----------
        business_categories_t : DataFrame
            A transformed DataFrame where each row represents a business and each column represents a category.

        Returns
        -------
        cosine_sim : ndarray
            A cosine similarity matrix.
        """
        cosine_sim = cosine_similarity(business_categories_t, business_categories_t)
        return cosine_sim
    
    def business_finder(self, name):
        """
        Finds and returns the closest matching business name for the provided input using fuzzy string matching.

        Parameters
        ----------
        name : str
            The name of the business to find.

        Returns
        -------
        closest_match : str
            The closest matching business name.
        """
        all_names = self.business_data['name'].tolist()
        closest_match = process.extractOne(name, all_names)
        return closest_match[0]
    
    def get_content_based_recommendations(self, title_string, cosine_sim, n_recommendations=10):
        """
        Generates and prints a list of business names that are similar to the given business name based on content similarity.

        Parameters
        ----------
        title_string : str
            The name of the business to find similar businesses for.
        cosine_sim : ndarray
            A cosine similarity matrix.
        n_recommendations : int, optional
            The number of similar businesses to find (default is 10).

        Returns
        -------
        list_of_business_related : list
            A list of similar business names.
        """
        business_idx = dict(zip(self.business_data['name'], list(self.business_data.index)))
        title = self.business_finder(title_string)
        idx = business_idx[title]
        
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:(n_recommendations+1)]
        similar_businesses = [i[0] for i in sim_scores]
        
        print(f"Because you searched {title}:")
        print("\n")
        
        list_of_business_related = self.business_data['name'].iloc[similar_businesses].tolist()
        for i in list_of_business_related:
            print(i)
            
        return list_of_business_related
        
        
    def automate_work_cb(self, name):
        """
        Automates the process of finding content-based similar businesses to the one provided by name.

        Parameters
        ----------
        name : str
            The name of the business to find similar businesses for.

        Returns
        -------
        about_business : DataFrame
            A pandas DataFrame containing detailed information about the similar businesses.
        """
        business_categories_t = self.transformer_matrix()
        cosine_sim = self.cosine_model(business_categories_t)
        list_of_business_related = self.get_content_based_recommendations(name, cosine_sim)
        
        about_business = pd.DataFrame(columns=list(self.business_data.columns))
        for count, business_name in enumerate(list_of_business_related):
            # Filter the business_data to find the matching business
            filt = self.business_data['name'] == business_name
            if not self.business_data.loc[filt].empty:
                about_business.loc[count] = self.business_data.loc[filt].iloc[0]
        
        return about_business


In [15]:
import pandas as pd
import numpy as np
import ast
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity
from fuzzywuzzy import process

In [12]:
cb = ContentBased(auto_bus)

In [13]:
cb.automate_work_cb("spa")

Because you searched Cobblestone Auto Spa:


Francis & Sons Car Wash
Fabulous Freddy's
Premier Auto Wash
Francis and Sons Car Wash
Cobblestone Auto Spa
Cobblestone Auto Spa
Lindstrom Family Auto Wash
Dolphin Car Wash
Francis & Sons Car Wash
Tnt Car Wash


Unnamed: 0,business_id,full_address,hours,open,categories,city,review_count,name,neighborhoods,longitude,...,Parking,Food,Car Share Services,Motorcycle Gear,Towing,Casinos,Local Services,Bike Rentals,Fashion,Computers
0,DiL46Xy45XVDSZF3-krjpA,"17115 E Shea Blvd\nFountain Hills, AZ 85268",{},True,"[Car Wash, Automotive, Auto Detailing]",Fountain Hills,12,Francis & Sons Car Wash,[],-111.712063,...,0,0,0,0,0,0,0,0,0,0
1,R_r61_57_RyNpIxSx8obxw,"1101 S Fort Apache Rd\nWestside\nLas Vegas, NV...","{'Monday': {'close': '20:00', 'open': '07:30'}...",True,"[Car Wash, Automotive, Gas & Service Stations,...",Las Vegas,48,Fabulous Freddy's,['Westside'],-115.292337,...,0,0,0,0,0,0,0,0,0,0
2,f9uze4iu5_ZHylYiyYNStg,4975 E Charleston Blvd\nSunrise\nSunrise Manor...,{},True,"[Car Wash, Automotive, Auto Detailing, Oil Cha...",Sunrise Manor,3,Premier Auto Wash,['Sunrise'],-115.066078,...,0,0,0,0,0,0,0,0,0,0
3,2Lfphj6nhZ0uRN9Ajt4wtA,"5020 E Main St\nMesa, AZ 85205","{'Monday': {'close': '18:00', 'open': '07:30'}...",True,"[Car Wash, Automotive, Auto Detailing, Auto Gl...",Mesa,10,Francis and Sons Car Wash,[],-111.723622,...,0,0,0,0,0,0,0,0,0,0
4,hwFmL7-t06MtGvXGYMRo2g,"3739 E Bell Rd\nPhoenix, AZ 85032","{'Monday': {'close': '18:00', 'open': '07:00'}...",True,"[Car Wash, Automotive, Gas & Service Stations,...",Phoenix,42,Cobblestone Auto Spa,[],-112.001055,...,0,0,0,0,0,0,0,0,0,0
5,hwFmL7-t06MtGvXGYMRo2g,"3739 E Bell Rd\nPhoenix, AZ 85032","{'Monday': {'close': '18:00', 'open': '07:00'}...",True,"[Car Wash, Automotive, Gas & Service Stations,...",Phoenix,42,Cobblestone Auto Spa,[],-112.001055,...,0,0,0,0,0,0,0,0,0,0
6,9vZ2G7kW_8APo3snijBNaA,"3003 E Indian School Rd\nPhoenix, AZ 85016","{'Monday': {'close': '17:00', 'open': '08:00'}...",True,"[Car Wash, Automotive, Auto Detailing]",Phoenix,34,Lindstrom Family Auto Wash,[],-112.016976,...,0,0,0,0,0,0,0,0,0,0
7,uU_GiwzWf314dy0WY28Mgg,"2039 S Power Rd\nMesa, AZ 85209",{},True,"[Car Wash, Automotive, Auto Detailing]",Mesa,9,Dolphin Car Wash,[],-111.687799,...,0,0,0,0,0,0,0,0,0,0
8,DiL46Xy45XVDSZF3-krjpA,"17115 E Shea Blvd\nFountain Hills, AZ 85268",{},True,"[Car Wash, Automotive, Auto Detailing]",Fountain Hills,12,Francis & Sons Car Wash,[],-111.712063,...,0,0,0,0,0,0,0,0,0,0
9,UPU4ynb-TVLHFoqFaC74cA,"17070 N 134th Dr\nSurprise, AZ 85374","{'Monday': {'close': '17:00', 'open': '07:30'}...",True,"[Car Wash, Automotive, Auto Detailing]",Surprise,26,Tnt Car Wash,[],-112.347841,...,0,0,0,0,0,0,0,0,0,0


In [16]:
cf = collaborative_filtering(business_data= auto_bus , review_data=auto_rv)

In [17]:
cf.automate_work("spa")

Because you searched Cobblestone Auto Spa:
The Carwasher
Midas Auto Service
German Motor Works
Fletcher's Tire & Auto Service, Inc
Ken's Transmission
AZ Auto Air
Westy Motorsports
Superpumper
Pep Boys Automotive Supercenters




Unnamed: 0,business_id,full_address,hours,open,categories,city,review_count,name,neighborhoods,longitude,...,Parking,Food,Car Share Services,Motorcycle Gear,Towing,Casinos,Local Services,Bike Rentals,Fashion,Computers
0,HhBORuXtFp-2IWQ640Fg0w,"2011 E Cactus Rd\nPhoenix, AZ 85022","{'Monday': {'close': '18:00', 'open': '08:00'}...",True,"[Car Wash, Automotive, Auto Detailing]",Phoenix,7,The Carwasher,[],-112.036353,...,0,0,0,0,0,0,0,0,0,0
1,R5Z0wHmsEa03h9Lw_WS7ZA,"4223 E Bell Road\nPhoenix, AZ 85032","{'Monday': {'close': '18:00', 'open': '07:30'}...",True,"[Auto Repair, Automotive]",Phoenix,4,Midas Auto Service,[],-111.98951,...,0,0,0,0,0,0,0,0,0,0
2,_XtJUUgt7CpXlb_u5l2MWg,"7255 E Adobe Dr\nScottsdale, AZ 85255","{'Tuesday': {'close': '17:00', 'open': '07:30'...",True,"[Auto Repair, Automotive]",Scottsdale,3,German Motor Works,[],-111.924033,...,0,0,0,0,0,0,0,0,0,0
3,Yb6Sy5Vc5YO07lrkhcwnpg,"4719 E. Cactus Rd\nPhoenix, AZ 85032",{},True,"[Auto Repair, Automotive]",Phoenix,14,"Fletcher's Tire & Auto Service, Inc",[],-111.977936,...,0,0,0,0,0,0,0,0,0,0
4,VBKa5uOmDBzfyWy-SsoaKw,"2630 E Bell Rd\nPhoenix, AZ 85032","{'Monday': {'close': '17:30', 'open': '07:30'}...",True,"[Auto Repair, Automotive]",Phoenix,7,Ken's Transmission,[],-112.025158,...,0,0,0,0,0,0,0,0,0,0
5,ISHaYZ4DnLwzjHYW3922NA,"1900 N McClintock Dr\nTempe, AZ 85281","{'Monday': {'close': '17:00', 'open': '08:00'}...",False,"[Auto Repair, Automotive]",Tempe,4,AZ Auto Air,[],-111.909552,...,0,0,0,0,0,0,0,0,0,0
6,cCe7VWVF1c8lMvhzsiGBOA,"5556 NW Grand Ave\nGlendale, AZ 85301","{'Monday': {'close': '18:00', 'open': '09:00'}...",False,"[Automotive, Motorcycle Dealers]",Glendale,5,Westy Motorsports,[],-112.1784,...,0,0,0,0,0,0,0,0,0,0
7,DYIB5861KIUfH5soVkP1Lg,"15752 N Frank Lloyd Wright B\nScottsdale, AZ 8...",{},True,"[Automotive, Gas & Service Stations]",Scottsdale,3,Superpumper,[],-111.886767,...,0,0,0,0,0,0,0,0,0,0
8,rtzWtF_2D73oHP1WoqGaAg,"2754 E Bell Rd\nPhoenix, AZ 85032","{'Monday': {'close': '21:00', 'open': '08:00'}...",True,"[Auto Repair, Automotive, Tires, Auto Parts & ...",Phoenix,13,Pep Boys Automotive Supercenters,[],-112.023303,...,0,0,0,0,0,0,0,0,0,0
