In [66]:
#IMPORTED LIBRARIES FOR HANDLING AND GENERATING THE FAKE RANDOM DATAFRAME
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
import numpy as np

In [67]:
#READING THE CSV FILE
df = pd.read_csv(r"D:\TRANSACTION PROJECT\Full Tourism Data.csv")

In [None]:
#DISPLAY THE DATAFRAME
df.head()

In [None]:
#CHECKING NULL VALUE
df.isna().sum()

In [None]:
#CHECKING DUPLICATE VALUES
df.duplicated().sum()

In [None]:
# CREATE USER-ATTRACTION MATRIX (PIVOT TABLE)
user_attraction_matrix = df.pivot_table(index="UserId", columns="AttractionId", values="Rating", fill_value=0)

In [71]:
# APPLY TRUNCATED SVD FOR DIMENSIONALITY REDUCTION
n_components = min(20, user_attraction_matrix.shape[1])  # Avoids errors if attractions < 20
svd = TruncatedSVD(n_components=n_components)
user_attraction_matrix_reduced = svd.fit_transform(user_attraction_matrix)

In [None]:
#USE KNN FOR FINDING SIMILAR USERS (AVOIDS MEMORY ERROR)
knn_model = NearestNeighbors(n_neighbors=5, metric='cosine', algorithm='auto')
knn_model.fit(user_attraction_matrix_reduced)

In [74]:
# FUNCTION TO RECOMMEND ATTRACTIONS BASED ON SIMILAR USERS
def recommend_attractions(user_id, num_recommendations=5):
    if user_id not in user_attraction_matrix.index:
        return "User ID not found! Try with a different ID."
    
    # FIND THE NEAREST NEIGHBORS (SIMILAR USERS)
    user_idx = user_attraction_matrix.index.get_loc(user_id)
    distances, indices = knn_model.kneighbors([user_attraction_matrix_reduced[user_idx]], n_neighbors=5)
    
    # GET SIMILAR USERS
    similar_users = user_attraction_matrix.index[indices.flatten()[1:]]  # Exclude self

    # FIND ATTRACTIONS RATED HIGH BY SIMILAR USERS BUT NOT VISITED BY CURRENT USER
    user_ratings = user_attraction_matrix.loc[user_id]
    unseen_attractions = user_ratings[user_ratings == 0].index  # Attractions not visited by user

    # GET AVERAGE RATINGS FROM SIMILAR USERS
    attraction_scores = {}
    for sim_user in similar_users:
        for attraction in unseen_attractions:
            attraction_scores[attraction] = attraction_scores.get(attraction, 0) + user_attraction_matrix.loc[sim_user, attraction]

    # SORT ATTRACTIONS BASED ON HIGHEST SCORES
    recommended_attractions = sorted(attraction_scores, key=attraction_scores.get, reverse=True)[:num_recommendations]
    
    return recommended_attractions if recommended_attractions else "No new recommendations found."


In [None]:
# EXAMPLE USAGE AND CHECK THE PREDICTION
user_id = 13 
recommended = recommend_attractions(user_id)

# PRINT RESULTS
print(f"Recommended Attractions for User {user_id}: {recommended}")

In [None]:
#IMPORTING LIBRARIES TO EVALUATE THE MODEL
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
#FUNCTION TO RECOMMEND THE SIMILAR USER BY TRANSACTION
reconstructed_matrix = svd.inverse_transform(user_attraction_matrix_reduced)

original = user_attraction_matrix.to_numpy()
reconstructed = reconstructed_matrix

rmse = np.sqrt(mean_squared_error(original, reconstructed))
mse = mean_squared_error(original, reconstructed)
mae = mean_absolute_error(original, reconstructed)
r2 = r2_score(original, reconstructed)

print(f"Reconstruction RMSE: {rmse:.4f}")
print(f"Reconstruction MSE: {mse:.4f}")
print(f"Reconstruction MAE: {mae:.4f}")
print(f"Reconstruction R² Score: {r2:.4f}")

In [None]:
#IMPORT JOBLIB SAVE THE MODEL
import joblib

In [None]:
#SAVE THE TRAINED SVD MODEL
joblib.dump(svd, r"D:\TRANSACTION PROJECT\svd1 for recommend.pkl")

In [None]:
#SAVE THE SVD OUTPUT
joblib.dump(user_attraction_matrix_reduced, r"D:\TRANSACTION PROJECT\user_attraction_matrix_reduced for recommend.pkl")

In [None]:
#SAVE THE TRAINED KNN MODEL
joblib.dump(knn_model, r"D:\TRANSACTION PROJECT\knn_model1 for recommend.pkl")


In [None]:
#SAVE THE USER ATTRACTION MATRIX
joblib.dump(user_attraction_matrix, r"D:\TRANSACTION PROJECT\user_attraction_matrix1 for recommend.pkl")
