In [44]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


from sklearn.neighbors import NearestNeighbors
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

# System
import warnings
import os
warnings.filterwarnings("ignore")
%matplotlib inline

In [6]:
df = pd.read_csv("Book_ratingC>400.csv")
df.head()

Unnamed: 0,user_id,book_id,bookrating,booktittle,book_author,year_of_pub,total_ratings
0,276925,316666343,0,The Lovely Bones: A Novel,Alice Sebold,2002,1295
1,277195,316666343,0,The Lovely Bones: A Novel,Alice Sebold,2002,1295
2,277413,316666343,0,The Lovely Bones: A Novel,Alice Sebold,2002,1295
3,277427,316666343,0,The Lovely Bones: A Novel,Alice Sebold,2002,1295
4,277439,316666343,7,The Lovely Bones: A Novel,Alice Sebold,2002,1295


In [365]:
class Knn_recommendationEngine():
    
    def __init__(self, df):
        self.df = df
        self.user_mapping = {user_id: idx for idx, user_id in enumerate(df['user_id'].unique())}
        self.book_mapping = {book_id: title for book_id, title in zip(df['book_id'], df['booktittle'])}
        self.n_users = len(df['user_id'].unique())
        
        
    def knn_similarity(self, target_user, k=3, metric='cosine', data =None):
        
        try:
            
            if data is None:
                data = self.df

            neighbors = NearestNeighbors(n_neighbors=k, metric=metric)
            
            book_ratings = self.df['bookrating'].values.reshape(-1, 1)
            neighbors.fit(book_ratings)

            target_user_rating = self.df[self.df['user_id'] == target_user]['bookrating'].values.reshape(-1, 1)
            distances, indices = neighbors.kneighbors(target_user_rating)
            knn_users = self.df.iloc[indices[0]]['user_id'].tolist()
            
            # convert to the dataframe for clean output
            convert_to_df = pd.DataFrame(knn_users, columns=['KNN_similar_users'])
            
            #hide the index numbering for clean output
            #KNN_users = convert_to_df.style.hide_index()
            
            return convert_to_df
        
        except Exception as e:
            print(f"Error in knn_similarity: {e}")
            return None
        
        except KeyError:
            print('wrong user ID number, please enter the correct user ID')
        except UnboundLocalError:
            print('a local variable is referenced before assignment, please check your variables and try again')
        except NameError :
            print('there is a name in the function that is not defined, please check to fix this')
            
        except UnboundLocalError:
            print('a local variable is referenced before assignment, please check your variables and try again')
            
            
            
            
    def KNN_recommendation(self, target_user, n=5, similar_users=None, metric='cosine'):
        try:
            
            if similar_users is None:
                
                        # Get n most similar users to the target user
                similar_users = self.knn_similarity(target_user, k=n, metric=metric)
                

                       # Find books that similar users have read and target user hasn't read
                recommended_books = {}
                for user in similar_users['KNN_similar_users'].tolist():
                    for booktittle in self.df[(self.df['user_id'] == user) & (self.df['bookrating'] >=4)]['booktittle'].values:
                        if not self.df[(self.df['user_id'] == target_user) & (self.df['booktittle'] == booktittle)]['booktittle'].values:
                            if booktittle in recommended_books:
                                recommended_books[booktittle] += 1
                            else:
                                recommended_books[booktittle] = 1

                             # Sort recommended books by count
                sorted_recommended_books = sorted(recommended_books.items(), key=lambda x: x[1], reverse=True)
                    
                    #return sorted_recommended_books[:n]
                    #recommendations = dict(sorted_recommended_books[:n])
                
                recommendations = pd.DataFrame(list(dict(sorted_recommended_books[:n]).items()), columns=['Recommendations', 'recommendationCount'])
           
    
                return recommendations
    
    
        except Exception as e:
            print(f"Error in KNN_recommendation: {e}")
            return pd.DataFrame()
        
        except KeyError:
            print('wrong user ID number, please enter the correct user ID')
        except UnboundLocalError:
            
            print('a local variable is referenced before assignment, please check your variables and try again')
        except NameError :
            print('there is a name in the function that is not defined, please check to fix this')
            
        except UnboundLocalError:
            print('a local variable is referenced before assignment, please check your variables and try again')
            
            
            
            
            
            
    def evaluate_similarity_metrics(self, target_user, k=5, similarity_metrics=['cosine']):
        try:
            
            recommendation_accuracies = {}

            for metric in similarity_metrics:
                similar_users = self.knn_similarity(target_user, k=k, metric=metric)
                recommendations = self.KNN_recommendation(target_user, n=k)

                if recommendations.empty:
                    print(f"No recommendations found for metric {metric}. Skipping...")
                    recommendation_accuracies[metric] = 0.0
                    continue

                        # Calculate correct recommendations for all similar users
                correct_recommendations = 0
                for user in similar_users['KNN_similar_users'].tolist():
                    similar_user_books = set(self.df[(self.df['user_id'] == user) & (self.df['bookrating'] >=4)]['booktittle'].values)
                    if recommendations['Recommendations'].isin(similar_user_books).any():
                        correct_recommendations += 1

                     # Calculate the recommendation accuracy
                accuracy = correct_recommendations / k
                recommendation_accuracies[metric] = accuracy
                recommendation_accuracy = pd.DataFrame(recommendation_accuracies.items(), columns=['metric', 'recommendation_accuracy'])

            return recommendation_accuracy
        
        except Exception as e:
            print(f"Error in KNN_recommendation: {e}")
            return None
        
        except KeyError:
            print('wrong user ID number, please enter the correct user ID')
        except UnboundLocalError:
            
            print('a local variable is referenced before assignment, please check your variables and try again')
        except NameError :
            print('there is a name in the function that is not defined, please check to fix this')
            
        except UnboundLocalError:
            print('a local variable is referenced before assignment, please check your variables and try again')
            
            
            
            
            
            
            
            #second implementation using many options
   
            
            
            
            
            
            
    def KNN_similarity_2(self, target_user, k=3, metrics=['cosine'], data=None):
        
        try:
            if data is None:
                data = self.df

            target_user_data = data[data['user_id'] == target_user][['bookrating']]

            if 'cosine' in metrics:
                neighbors = NearestNeighbors(n_neighbors=k, metric='cosine')
            elif 'euclidean' in metrics:
                neighbors = NearestNeighbors(n_neighbors=k, metric='euclidean')
            elif 'manhattan' in metrics:
                neighbors = NearestNeighbors(n_neighbors=k, metric='manhattan')
            elif 'chebyshev' in metrics:
                neighbors = NearestNeighbors(n_neighbors=k, metric='chebyshev')
            else:
                raise ValueError("Unknown similarity metric(s). Supported metrics are: 'cosine', 'euclidean', 'manhattan', 'chebyshev'.")

            neighbors.fit(data[['bookrating']])

            distances, indices = neighbors.kneighbors(target_user_data)
            knn_users = data.iloc[indices[0]]['user_id'].tolist()

            return knn_users

        except KeyError:
            print('Wrong user ID number, please enter the correct user ID')
            
            
            
            
            
    def KNN_recommendation_2(self, target_user, n=5, similar_users=None, data =None):
        try:
            if data is None:
                data = self.df
            
            if similar_users is None:
                
                        # Get n most similar users to the target user
                #
                similar_users = self.KNN_similarity_2(target_user, k=n)
                

                       # Find books that similar users have read and target user hasn't read
                recommended_books = {}
                for user in similar_users:
                    for booktittle in self.df[(self.df['user_id'] == user) & (self.df['bookrating'] >=4)]['booktittle'].values:
                        if not self.df[(self.df['user_id'] == target_user) & (self.df['booktittle'] == booktittle)]['booktittle'].values:
                            if booktittle in recommended_books:
                                recommended_books[booktittle] += 1
                            else:
                                recommended_books[booktittle] = 1

                             # Sort recommended books by count
                sorted_recommended_books = sorted(recommended_books.items(), key=lambda x: x[1], reverse=True)
                    
                    #return sorted_recommended_books[:n]
                    #recommendations = dict(sorted_recommended_books[:n])
                
                recommendations = pd.DataFrame(list(dict(sorted_recommended_books[:n]).items()), columns=['Recommendations', 'recommendedCount'])
           
    
                return recommendations
        except Exception as e:
            print(f"Error in KNN_recommendation: {e}")
            return pd.DataFrame()
        
        except KeyError:
            print('wrong user ID number, please enter the correct user ID')
        except UnboundLocalError:
            
            print('a local variable is referenced before assignment, please check your variables and try again')
        except NameError :
            print('there is a name in the function that is not defined, please check to fix this')
            
        except UnboundLocalError:
            print('a local variable is referenced before assignment, please check your variables and try again')
            
            
            
            
            
    def evaluate_similarity_metrics_2(self, target_user, k=5, similarity_metrics=['cosine'], n_splits=5):
        try:
            from sklearn.model_selection import KFold
            recommendation_accuracies = {}

                       # Perform k-fold cross-validation
            kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
            for metric in similarity_metrics:
                total_accuracy = 0.0
                for train_idx, test_idx in kf.split(self.df):
                      
                        # Split data into training and testing sets for this fold
                    train_data = self.df.iloc[train_idx]
                    test_data = self.df.iloc[test_idx]

                               # Calculate similar users and recommendations for the target user in this fold
                    similar_users = self.KNN_similarity_2(target_user, k=k, metrics=similarity_metrics, data=train_data)
                    recommendations = self.KNN_recommendation_2(target_user, n=k, similar_users=None, data=train_data)

                    if recommendations.empty:
                        continue

                              # Calculate correct recommendations for this fold
                    correct_recommendations = 0
                    for user in similar_users:
                        similar_user_books = set(train_data[(train_data['user_id'] == user) & (train_data['booktittle'] != '')]['booktittle'].values)
                        if recommendations['Recommendations'].isin(similar_user_books).any():
                            correct_recommendations += 1

                              # Calculate the recommendation accuracy for this fold
                    accuracy = correct_recommendations / k
                    total_accuracy += accuracy

                            #   Calculate average accuracy across all folds for this metric
                average_accuracy = total_accuracy / n_splits
                recommendation_accuracies[metric] = round(average_accuracy, 3)

            return recommendation_accuracies
        
        except Exception as e:
            print(f"Error in KNN_recommendation: {e}")
            return None
        
        except KeyError:
            print('wrong user ID number, please enter the correct user ID')
        except UnboundLocalError:
            
            print('a local variable is referenced before assignment, please check your variables and try again')
        except NameError :
            print('there is a name in the function that is not defined, please check to fix this')
            
        except UnboundLocalError:
            print('a local variable is referenced before assignment, please check your variables and try again')
        
        
    
    
    
    
    
    
   
            
            
            
            
    

In [366]:
d = Knn_recommendationEngine(df)

In [371]:
def User_interface():
    print("\nWelcome to the Book Recommendation System!")
    
    try:
        r = Knn_recommendationEngine(df)
        
        choice = int(input('Enter 1 to find K nearest Neigbors to a user, 2 for recommendation to the target user, and 3 to validate the metrics used in the recommendation : '))
        
        if(choice == 1):
            print("\nChoose the similarity metric for finding k nearest neigbors to the target user :")
            print("1. cosine")
            print("2. correlation")
            print("3. euclidean")
            print("4. jaccard")
            print('5: manhattan')
            print('6: russellrao')
            print('7: minkowski')
            target_user = int(input('\nEnter the target userID number to find k narest similarity: '))
            metric = int(input('\nYou can choose metric to use or ignore to use default [cosine]:'))
            k = int(input('\n enter the number of K nearest neigbors : '))
            
            if metric not in [1, 2, 3, 4, 5, 6, 7]:
                print("Invalid input. Please enter a valid number.")
                return
            
            if(metric == 1):
                print(f'\nthe {k} nearest neigbors to {target_user} using cosine metric are : ')
                print(r.knn_similarity(target_user, k, metric='cosine'))
                
            elif(metric == 2):
                print(f'\nthe {k} nearest neigbors to {target_user} using correlation metric are : ')
                print(r.knn_similarity(target_user, k, metric='correlation'))
                
            elif(metric == 3):
                print(f'the {k} nearest neigbors to {target_user} using euclidean metric are : ')
                print(r.knn_similarity(target_user, k, metric='euclidean'))
                
            elif(metric == 4):
                print(f'\nthe {k} nearest neigbors to {target_user} using jaccard metric are : ')
                print(r.knn_similarity(target_user, k, metric='jaccard'))
                
            elif(metric == 5):
                print(f'\nthe {k} nearest neigbors to {target_user} using manhattan metric are : ')
                print(r.knn_similarity(target_user, k, metric='manhattan'))
                
            elif(metric == 6):
                print(f'the {k} nearest neigbors to {target_user} using russellrao metric are : ')
                print(r.knn_similarity(target_user, k, metric='russellrao'))
                
            elif(metric == 7):
                print(f'\nthe {k} nearest neigbors to {target_user} using minkowski metric are : ')
                print(r.knn_similarity(target_user, k, metric='minkowski')) 
                
            else:
                print(f'\nthe {k} nearest neigbors to {target_user}  are : ')
                print(r.knn_similarity(target_user, k))
                
                
        elif(choice == 2):
            print("\n you can Choose the similarity metric to use in making recommendation based on KNN to the target user or ignore to use default [cosine]:")
            print("1. cosine")
            print("2. correlation")
            print("3. euclidean")
            print("4. jaccard")
            print('5: manhattan')
            print('6: russellrao')
            print('7: minkowski')
            target_user = int(input('\nEnter the target userID number to recommend books : '))
            metric = int(input('\nYou can choose metric to use or ignore to use default [cosine]:'))
            n = int(input(f'\nEnter the number of books to recommend to {target_user} :'))
            if metric not in [1, 2, 3, 4, 5, 6, 7]:
                print("Invalid input. Please enter a valid number.")
                return
            
            if(metric == 1):
                print(f'the {n} recommendation to {target_user} based on KNN using cosine metric are : ')
                print(r.KNN_recommendation(target_user, n, metric='cosine'))
                
                
            elif(metric == 2):
                print(f'the {n} recommendation to {target_user} based on KNN using correlation metric are  : ')
                print(r.KNN_recommendation(target_user, n, metric='correlation'))
                
            elif(metric == 3):
                print(f'the {n} recommendation to {target_user} based on KNN using euclidean metric are : ')
                print(r.KNN_recommendation(target_user, n, metric='euclidean'))
                
            elif(metric == 4):
                print(f'the {n} recommendation to {target_user} based on KNN using jaccard metric are  : ')
                print(r.KNN_recommendation(target_user, n, metric='jaccard'))
                
                
            elif(metric == 5):
                print(f'the {n} recommendation to {target_user} based on KNN using manhattan metric are  : ')
                print(r.KNN_recommendation(target_user, n, metric='manhattan'))
                
            elif(metric == 6):
                print(f'the {n} recommendation to {target_user} based on KNN using russellrao metric are  : ')
                print(r.KNN_recommendation(target_user, n, metric='russellrao'))
                
            elif(metric == 7):
                print(f'the {n} recommendation to {target_user} based on KNN using minkowski metric are  : ')
                print(r.KNN_recommendation(target_user, n, metric='minkowski'))
                
                
                
        elif(choice == 3):
            target_user = int(input('\nEnter the target userID to validate recommendation metric : '))
            k = int(input(f'\nEnter the K value for nearest neighbors recommendation to {target_user} :'))
            print(f'\nthe validation metrics score used in the recommendation are : ')
            print(r.evaluate_similarity_metrics(target_user, k, similarity_metrics=['cosine', 'correlation', 'euclidean', 'jaccard', 'manhattan', 'russellrao', 'minkowski']))
            
            
        else:
            target_user = int(input('\nEnter the target userID number to recommend books : '))
            metric = int(input('\nYou can choose metric to use or ignore to use default [cosine]:'))
            n = int(input(f'\nEnter the number of books to recommend to {target_user} :'))
            print(f'the {n} recommendation to {target_user} based on KNN using cosine metric are : ')
            print(r.KNN_recommendation(target_user, n, metric='cosine'))
            
            
    except KeyError:
        print('wrong user ID number, please enter the correct user ID')
    
    except ValueError:
        print('input for what you want to do and number of recommendation can only be integer values, please check and try again')
        
    except Exception as e:
        print(f"Error in the function: {e}")

        
            
            
                
        
        

In [372]:
User_interface()


Welcome to the Book Recommendation System!
Enter 1 to find K nearest Neigbors to a user, 2 for recommendation to the target user, and 3 to validate the metrics used in the recommendation : 2

 you can Choose the similarity metric to use in making recommendation based on KNN to the target user or ignore to use default [cosine]:
1. cosine
2. correlation
3. euclidean
4. jaccard
5: manhattan
6: russellrao
7: minkowski

Enter the target userID number to recommend books : 277427

You can choose metric to use or ignore to use default [cosine]:1

Enter the number of books to recommend to 277427 :10
the 10 recommendation to 277427 based on KNN using cosine metric are : 
                                     Recommendations  recommendationCount
0                                     A Time to Kill                    4
1                                The Horse Whisperer                    2
2                         The Pilot's Wife : A Novel                    2
3                                