### Importing neccesary libraries

In [None]:
from surprise import Dataset
from surprise import Reader
from surprise import SVD  # SVD stands for Singular Value Decomposition, also an MF-based method like ALS
from surprise.model_selection import train_test_split
from surprise import accuracy
import pandas as pd
import numpy as np

### Loading Business, Review csv files and Merging them into new business_reviews.csv file

In [None]:
#Loading the Review CSV file into a DataFrame
df_review = pd.read_csv('/content/drive/Shareddrives/228 Travel Recommendation/Dataset/yelp_academic_dataset_review.csv', usecols=['user_id', 'business_id', 'stars'])

#Loading the Business CSV file into another DataFrame
df_business = pd.read_csv('/content/drive/Shareddrives/228 Travel Recommendation/Dataset/yelp_academic_dataset_business.csv', usecols=['business_id', 'name'])

#Merging the two DataFrames based on the 'business_id' column
merged_df = pd.merge(df_review, df_business, on='business_id', how='inner')

#Saving the merged DataFrame to a new business_reviews CSV file
merged_df.to_csv('/content/drive/Shareddrives/228 Travel Recommendation/Dataset/business_reviews.csv', index=False)

In [None]:
#Loading the new CSV file into a Pandas DataFrame
file_path = '/content/drive/Shareddrives/228 Travel Recommendation/Dataset/business_reviews.csv'
data = pd.read_csv(file_path)

In [None]:
data.head

<bound method NDFrame.head of                         user_id             business_id  stars  \
0        mh_-eMZ6K5RLWhZyISBhwA  XQfwVwDr-v0ZS3_CbbE5Xw    3.0   
1        Iaee7y6zdSB3B-kRCo4z1w  XQfwVwDr-v0ZS3_CbbE5Xw    2.0   
2        ejFxLGqQcWNLdNByJlIhnQ  XQfwVwDr-v0ZS3_CbbE5Xw    4.0   
3        f7xa0p_1V9lx53iIGN5Sug  XQfwVwDr-v0ZS3_CbbE5Xw    3.0   
4        dCooFVCk8M1nVaQqcfTL3Q  XQfwVwDr-v0ZS3_CbbE5Xw    2.0   
...                         ...                     ...    ...   
6990275  xHu1jmrnv4DdJMuC8IxeRg  vI4vyi1dfG93oAiSRFDymA    1.0   
6990276  aYveEctPYcZiubXyEgLhTA  vI4vyi1dfG93oAiSRFDymA    5.0   
6990277  oz-So7Kwo5tW51HrT-BgIg  vI4vyi1dfG93oAiSRFDymA    1.0   
6990278  09zj3b4tM-xJjozvtk34wQ  vI4vyi1dfG93oAiSRFDymA    1.0   
6990279  hyfUdXDmgqA4GI3S11I69w  vI4vyi1dfG93oAiSRFDymA    5.0   

                                     name  
0            Turning Point of North Wales  
1            Turning Point of North Wales  
2            Turning Point of North Wales  
3

In [None]:
print(data.columns)

Index(['user_id', 'business_id', 'stars', 'name'], dtype='object')


### Defining a Reader object specifying the rating scale

In [None]:
reader = Reader(rating_scale=(1, 5))  #the rating scale we have is from 1 to 5

In [None]:
#Loading the dataset from the DataFrame
data_loading = Dataset.load_from_df(data[['user_id', 'business_id','stars']], reader)

### Splitting the dataset and Training the SVD model

In [None]:
#Splitting the data into training and testing sets
trainset, testset = train_test_split(data_loading, test_size=0.2)  # You can adjust the test_size

In [None]:
#Defining the SVD model
model = SVD()

#Training the model on the training set
model.fit(trainset)

#Making the predictions on the test set
predictions = model.test(testset)

### Performing Cosine Similiarity

In [None]:
#doing predictive rating and actual ratings

predicted_ratings = [pred.est for pred in predictions]
actual_ratings = [pred.r_ui for pred in predictions]

In [None]:
#Calculating Cosine Similarity between precitive rating and actual rating

from sklearn.metrics.pairwise import cosine_similarity

#Converting the lists to NumPy arrays
predicted_ratings_array = np.array([predicted_ratings])
actual_ratings_array = np.array([actual_ratings])

#Reshaping arrays
predicted_ratings_array = predicted_ratings_array.reshape(1, -1)
actual_ratings_array = actual_ratings_array.reshape(1, -1)

#Calculating cosine similarity
similarity_score = cosine_similarity(predicted_ratings_array, actual_ratings_array)

In [None]:
print(f"Cosine Similarity Score: {similarity_score[0, 0]}")

Cosine Similarity Score: 0.9476924933511831


### Making Recommendations for a specific user

In [None]:
user_id = 'vI4vyi1dfG93oAiSRFDymA'
user_recommendations = []

In [None]:
data_loading = Dataset.load_from_df(data[['user_id', 'business_id', 'stars']], reader)

#Fetching businesses that the user has not rated yet
rated_items = data_loading.df[data_loading.df['user_id'] == user_id]['business_id']
user_recommendations = []

for business_id in data_loading.df['business_id'].unique():
    if business_id not in rated_items.values:
        predicted_rating = model.predict(user_id, business_id).est
        user_recommendations.append((business_id, predicted_rating))

#Sorting the recommendations by predicted rating in descending order
user_recommendations.sort(key=lambda x: x[1], reverse=True)

#Displaying top 20 recommendations for the selected user
top_n = 20
print(f"Top {top_n} Recommendations for User {user_id}:")
for idx, (business_id, predicted_rating) in enumerate(user_recommendations[:top_n], 1):
    #Looking up 'name' from the 'data' DataFrame
    business_name = data[data['business_id'] == business_id]['name'].values[0]
    print(f"{idx}. Business ID: {business_id}, Business Name: {business_name}, Predicted Rating: {predicted_rating}")

Top 20 Recommendations for User vI4vyi1dfG93oAiSRFDymA:
1. Business ID: NDwoKO79_T49UEKVDlHd3A, Business Name: Sustainable Wine Tours, Predicted Rating: 4.9310282546320146
2. Business ID: B2Tuf5M1wQhdwAKnD-w7Yw, Business Name: New Orleans Airboat Tours, Predicted Rating: 4.928910449664647
3. Business ID: STEG37SqBC3PkwY4wgSoPg, Business Name: Taylor Home Solutions, Predicted Rating: 4.924875280294676
4. Business ID: QNilrbTi8912ye2ztnBMpA, Business Name: DeeTours of Santa Barbara, Predicted Rating: 4.92315855547764
5. Business ID: TDEV16C4GhK5wyhL-5V7ww, Business Name: Flambeaux Bicycle Tours, Predicted Rating: 4.922757893571927
6. Business ID: 0IjDqJexP6jTH4F_Kg4mrQ, Business Name: A New Twist Balloons and Face Painting, Predicted Rating: 4.922038445634181
7. Business ID: ez4kMLP6OJEIaMbMrrGRdA, Business Name: New Orleans Secrets Tours, Predicted Rating: 4.921074405323677
8. Business ID: im3hUe2nigm2Xm-Z1SNXIg, Business Name: B & B Heating and Air, Predicted Rating: 4.916877307732881
