In [1]:
# IMPORTING NECESSARY LIBRARIES FOR DATA HANDLING,DIMENSIONALITY REDUCTION
# SIMILARITY COMPUTATION, AND NEAREST NEIGHBOR SEARCH
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
import numpy as np

In [3]:
# LOADING THE TOURISM DATASET FROM A CSV FILE INTO A DATAFRAME  
df = pd.read_csv(r"D:\1final ds\final_ds2.csv")

In [None]:
# DISPLAYING THE FIRST FIVE ROWS OF THE DATAFRAME
df.head()

In [None]:
# CHECKING FOR MISSING VALUES IN EACH COLUMN OF THE DATAFRAME
df.isna().sum()

In [6]:
# COUNTING THE NUMBER OF DUPLICATE ROWS IN THE DATAFRAME
df.duplicated().sum()

np.int64(0)

In [7]:
# CREATING A USER-ATTRACTION MATRIX WHERE ROWS REPRESENT USERS, COLUMNS REPRESENT ATTRACTIONS, AND VALUES ARE RATINGS  
user_attraction_matrix = df.pivot_table(index="UserId", columns="AttractionId", values="Rating", fill_value=0)

In [8]:
# APPLYING TRUNCATED SVD FOR DIMENSIONALITY REDUCTION, KEEPING A MAXIMUM OF 20 COMPONENTS OR THE TOTAL NUMBER OF ATTRACTIONS IF FEWER  
n_components = min(20, user_attraction_matrix.shape[1]) 
svd = TruncatedSVD(n_components=n_components)
user_attraction_matrix_reduced = svd.fit_transform(user_attraction_matrix)

In [None]:
# TRAINING A K-NEAREST NEIGHBORS (KNN) MODEL USING COSINE SIMILARITY TO FIND THE 5 NEAREST NEIGHBORS  
knn_model = NearestNeighbors(n_neighbors=5, metric='cosine', algorithm='auto')
knn_model.fit(user_attraction_matrix_reduced)

In [10]:
# FUNCTION TO RECOMMEND ATTRACTIONS FOR A GIVEN USER BASED ON SIMILAR USERS' PREFERENCES  
# USES K-NEAREST NEIGHBORS TO FIND SIMILAR USERS AND SUGGEST UNSEEN ATTRACTIONS WITH THE HIGHEST SCORES  

def recommend_attractions(user_id, num_recommendations=5):
    if user_id not in user_attraction_matrix.index:
        return "User ID not found! Try with a different ID."
    
   
    user_idx = user_attraction_matrix.index.get_loc(user_id)
    distances, indices = knn_model.kneighbors([user_attraction_matrix_reduced[user_idx]], n_neighbors=5)
    
    
    similar_users = user_attraction_matrix.index[indices.flatten()[1:]]  

    
    user_ratings = user_attraction_matrix.loc[user_id]
    unseen_attractions = user_ratings[user_ratings == 0].index  

   
    attraction_scores = {}
    for sim_user in similar_users:
        for attraction in unseen_attractions:
            attraction_scores[attraction] = attraction_scores.get(attraction, 0) + user_attraction_matrix.loc[sim_user, attraction]

    
    recommended_attractions = sorted(attraction_scores, key=attraction_scores.get, reverse=True)[:num_recommendations]
    
    return recommended_attractions if recommended_attractions else "No new recommendations found."


In [None]:
# GENERATING AND DISPLAYING ATTRACTION RECOMMENDATIONS FOR A SPECIFIC USER  
user_id = 13 
recommended = recommend_attractions(user_id)


print(f"Recommended Attractions for User {user_id}: {recommended}")

In [12]:
# IMPORTING METRICS TO EVALUATE MODEL PERFORMANCE, INCLUDING MSE, MAE, AND R-SQUARED SCORE  
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
# EVALUATING THE RECONSTRUCTION QUALITY OF THE SVD MODEL BY COMPARING THE ORIGINAL AND RECONSTRUCTED USER-ATTRACTION MATRIX  
# COMPUTING RMSE, MSE, MAE, AND R² SCORE TO ASSESS MODEL PERFORMANCE  
reconstructed_matrix = svd.inverse_transform(user_attraction_matrix_reduced)

original = user_attraction_matrix.to_numpy()
reconstructed = reconstructed_matrix

rmse = np.sqrt(mean_squared_error(original, reconstructed))
mse = mean_squared_error(original, reconstructed)
mae = mean_absolute_error(original, reconstructed)
r2 = r2_score(original, reconstructed)

print(f"Reconstruction RMSE: {rmse:.4f}")
print(f"Reconstruction MSE: {mse:.4f}")
print(f"Reconstruction MAE: {mae:.4f}")
print(f"Reconstruction R² Score: {r2:.4f}")

In [14]:
# IMPORTING JOBLIB FOR SAVING AND LOADING MACHINE LEARNING MODELS EFFICIENTLY  
import joblib

In [None]:
# SAVING THE TRAINED SVD MODEL TO A FILE USING JOBLIB FOR FUTURE USE  
import joblib
joblib.dump(svd, r"D:\1final ds\svd.plk")

In [None]:
# SAVING THE DIMENSIONALLY REDUCED USER-ATTRACTION MATRIX FOR FUTURE RECOMMENDATION USE  
joblib.dump(user_attraction_matrix_reduced, r"D:\1final ds\user_att.plk")

In [None]:
# SAVING THE TRAINED K-NEAREST NEIGHBORS (KNN) MODEL FOR FUTURE RECOMMENDATION PREDICTIONS  
joblib.dump(knn_model, r"D:\1final ds\knn.plk")


In [None]:
# SAVING THE ORIGINAL USER-ATTRACTION MATRIX FOR FUTURE REFERENCE AND RECOMMENDATION PROCESS  
joblib.dump(user_attraction_matrix, r"D:\1final ds\useratt2.plk")
