In [20]:
#Importing required python libraries

import pandas as pd
import numpy as np
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
from sklearn.preprocessing import MinMaxScaler

books_df = pd.read_csv('/home/Sharvani/Documents/Books.csv',engine='python',sep=',')
books_df = books_df.rename(columns = {'ISBN;"Book-Title";"Book-Author";"Year-Of-Publication";"Publisher";"Image-URL-S";"Image-URL-M";"Image-URL-L"':'ISBN'})
books_df['book_title'] = books_df['ISBN'].str.split(';').str[1]
books_df['ISBN'] = books_df['ISBN'].str.split(';').str[0]
new_books_df = books_df[['book_title','ISBN']].drop_duplicates()
ratings_df = pd.read_csv("/home/Sharvani/Documents/CS512/Book-Ratings.csv", error_bad_lines=False)

merged_books_df = pd.merge(ratings_df,new_books_df,on='ISBN')
merged_books_df['ISBN_id'] = merged_books_df['ISBN'].astype("category").cat.codes
merged_books_df['USER_id'] = merged_books_df['User-ID'].astype("category").cat.codes

books_lookup = merged_books_df[['ISBN_id','book_title']].drop_duplicates()
books_lookup['ISBN_id'] = books_lookup.ISBN_id.astype(str)

merged_books_df.columns = ['User_ID','ISBN','Book_Rating','Book_Title','ISBN_ID','USER_id']

#full_data_books is a dataframe having complete information about book name, ID, userID.  
full_data_books = merged_books_df.copy()


merged_books_df = merged_books_df.drop(['User_ID','ISBN','Book_Title'],axis=1)
input_data_for_matrix = merged_books_df.loc[merged_books_df.Book_Rating != 0]

users = list(np.sort(input_data_for_matrix['USER_id'].unique()))
ISBN = list(np.sort(input_data_for_matrix['ISBN_ID'].unique()))
rating = list(input_data_for_matrix['Book_Rating'])
rows = input_data_for_matrix['USER_id'].astype(int)
cols = input_data_for_matrix['ISBN_ID'].astype(int)

ratings_matrix=sparse.csr_matrix((rating,(rows,cols)),shape = (len(rows),len(cols)))


In [3]:

#Details of the generated ratings sparse matrix 

print(ratings_matrix.data)
print(ratings_matrix.nnz)

[ 5.  5.  6. ... 10.  7.  7.]
73771


In [3]:
# ALS function generates 2 vectors X and Y. one for User_features and the other for items_features

def ALS(ratings_matrix,alpha,iterations,lambda_val,features):
    confidence_matrix = ratings_matrix * alpha
    
    user_size,book_size = ratings_matrix.shape
    print(user_size)
    print(book_size)
    X = sparse.csr_matrix(np.random.normal(size = (user_size,features)))
    Y = sparse.csr_matrix(np.random.normal(size = (book_size,features)))
    
    X_I = sparse.eye(user_size)
    Y_I = sparse.eye(book_size)
    
    I = sparse.eye(features)
    lI = lambda_val * I
    u_iteration = 0
    i_iteration = 0
    for i in range(iterations):
        #Update the two vectors X and Y which are respectively for users and books for 'iteration' number of iterations.
        print ('itearation %d of total %d iterations' % (i+1,iterations))
        
        yTy = Y.T.dot(Y)
        xTx = X.T.dot(X)
        
        for u in range(user_size):
            u_iteration += 1
            print('In U for the %d time' % (u_iteration))
            u_row = confidence_matrix[u,:].toarray() 

            p_u = u_row.copy()
            p_u[p_u != 0] = 1.0

            CuI = sparse.diags(u_row, [0])
            Cu = CuI + Y_I

            yT_CuI_y = Y.T.dot(CuI).dot(Y)
            yT_Cu_pu = Y.T.dot(Cu).dot(p_u.T)
            X[u] = spsolve(yTy + yT_CuI_y + lI, yT_Cu_pu)
            #This is one row updation of of the user vectors

    
        for i in range(book_size):
            i_iteration += 1
            print('In I for the %d time' % (i_iteration))
            i_row = confidence_matrix[:,i].T.toarray()

            p_i = i_row.copy()
            p_i[p_i != 0] = 1.0

            CiI = sparse.diags(i_row, [0])
            Ci = CiI + X_I

            xT_CiI_x = X.T.dot(CiI).dot(X)
            xT_Ci_pi = X.T.dot(Ci).dot(p_i.T)
            Y[i] = spsolve(xTx + xT_CiI_x + lI, xT_Ci_pi)
            #This is one column updation of of the book vectors
    print('Vectores X and Y have been created')
    return X, Y
    #in the end, we return the two vectors X and Y based on which recommendations are to be done
        
    

In [4]:
X,Y = ALS(ratings_matrix,40,1,0.1,10)

73771
73771
itearation 1 of total 1 iterations
In U for the 1 time
In U for the 2 time
In U for the 3 time
In U for the 4 time
In U for the 5 time
In U for the 6 time
In U for the 7 time
In U for the 8 time
In U for the 9 time
In U for the 10 time
In U for the 11 time
In U for the 12 time
In U for the 13 time
In U for the 14 time
In U for the 15 time
In U for the 16 time
In U for the 17 time
In U for the 18 time
In U for the 19 time
In U for the 20 time
In U for the 21 time
In U for the 22 time
In U for the 23 time
In U for the 24 time
In U for the 25 time
In U for the 26 time
In U for the 27 time
In U for the 28 time
In U for the 29 time
In U for the 30 time
In U for the 31 time
In U for the 32 time
In U for the 33 time
In U for the 34 time
In U for the 35 time
In U for the 36 time
In U for the 37 time
In U for the 38 time
In U for the 39 time
In U for the 40 time
In U for the 41 time
In U for the 42 time
In U for the 43 time
In U for the 44 time
In U for the 45 time
In U for the 46 t

In [34]:
X

<73771x10 sparse matrix of type '<class 'numpy.float64'>'
	with 737710 stored elements in Compressed Sparse Row format>

In [35]:
Y

<73771x10 sparse matrix of type '<class 'numpy.float64'>'
	with 737710 stored elements in Compressed Sparse Row format>

In [52]:
#Function to find similar books


def give_similar(book_id,X,Y):
    
    
    book_vec = Y[book_id].T

   #selecting 10 most similar books
    scores = Y.dot(book_vec).toarray().reshape(1,-1)[0]
    top_10 = np.argsort(scores)[::-1][:10]

    books = []
    books_scores = []

    #print
    for idx in top_10:
        books.append(books_lookup.book_title.loc[books_lookup.ISBN_id == str(idx)].iloc[0])
        books_scores.append(scores[idx])
    
    similar = pd.DataFrame({'Book': books})
    pd.set_option('display.max_colwidth', -1)
    print (similar)

In [53]:
give_similar(45911,X,Y)

                                                                             Book
0  "Harry Potter and the Order of the Phoenix (Book 5)"                          
1  "Chicken Soup for the Woman's Soul (Chicken Soup for the Soul Series (Paper))"
2  "Face the Fire (Three Sisters Island Trilogy)"                                
3  "Girl in Hyacinth Blue"                                                       
4  "Daughter of Fortune : A Novel (Oprah's Book Club (Hardcover))"               
5  "The Tale of the Body Thief (Vampire Chronicles (Paperback))"                 
6  "The Celestine Prophecy (Celestine Prophecy)"                                 
7  "Purity in Death"                                                             
8  "Life's Little Instruction Book (Life's Little Instruction Books (Paperback))"
9  "Strange Fits of Passion: A Novel"                                            


In [27]:
def recommend(USER_id, ratings_matrix, X, Y, no_of_books):
    """Recommend items for a given user given a trained model
    
        user_id (int): The id of the user we want to create recommendations for.    
        ratings_matrix (csr_matrix): Our original training data.
        X (csr_matrix): The trained user x features vectors
        Y (csr_matrix): The trained item x features vectors
        no_of_books (int): How many recommendations we want to return.
        
    Returns:
        recommendations (pandas.DataFrame): DataFrame with recommended books
    """
  
    
    user_interactions = ratings_matrix[USER_id,:].toarray()

    # Already read and rated for are set to 0
    
    user_interactions = user_interactions.reshape(-1) + 1    #Reshape to turn into 1D array
    user_interactions[user_interactions > 1] = 0

    #Calculating the recommendation
    rec_vector = X[USER_id,:].dot(Y.T).toarray()

    #Scaling.
    min_max = MinMaxScaler()
    rec_vector_scaled = min_max.fit_transform(rec_vector.reshape(-1,1))[:,0]
    recommend_vector = user_interactions*rec_vector_scaled
   
    #Top10. 
    book_index = np.argsort(recommend_vector)[::-1][:no_of_books]

    books = []
    scores = []
    
    for book in book_index:
        books.append(books_lookup.book_title.loc[books_lookup.ISBN_id == str(book)].iloc[0])
        scores.append(recommend_vector[book])

    recommendations = pd.DataFrame({'Book': books, 'Score': scores})
    print(recommendations)



In [49]:
recommend(1156,ratings_matrix,X,Y,10)

                                                             Book     Score
0  "A Painted House"                                               1.000000
1  "The No. 1 Ladies' Detective Agency (Today Show Book Club #8)"  0.838920
2  "Snow Falling on Cedars"                                        0.829849
3  "The Bridges of Madison County"                                 0.795668
4  "The Hundred Secret Senses"                                     0.740169
5  "All I Really Need to Know"                                     0.703483
6  "Lonesome Dove"                                                 0.700446
7  "Harry Potter and the Sorcerer's Stone (Book 1)"                0.690732
8  "Tears of the Giraffe (No.1 Ladies Detective Agency)"           0.676075
9  "A Child Called \"It\": One Child's Courage to Survive"         0.672392


In [None]:
#Create a simple GUI
import tkinter as tk
from tkinter.simpledialog import askstring, askinteger
from tkinter.messagebox import showerror


def display_1():
   
    num1 = entry_1.get()
    text = merged_books_df.loc[merged_books_df['Book_Title'] == '"' + num1 + '"','ISBN_ID']
    print(text[0])
    book_id_string = text[0]
    print(book_id_string)
    give_similar(book_id_string,user_vecs,item_vecs)
    

def display_2():
    num = entry_2.get()
 
    try:
       num = int(num)
    except ValueError:
        showerror('Non-Int Error', 'Please enter an integer')
    else:
        recommend(num,ratings_matrix,user_vecs,item_vecs,10)

    

root = tk.Tk()


entry_1 = tk.Entry(root)
btn_1 = tk.Button(root, text = "Submit Book title", command = display_1)

entry_2 = tk.Entry(root)
btn_2 = tk.Button(root, text = "Submit User title", command = display_2)


entry_1.grid(row = 0, column = 0)
btn_1.grid(row = 1, column = 0)
entry_2.grid(row = 0, column = 1)
btn_2.grid(row = 1, column = 1)

#entry_1_string = str(entry_1)
#entry_1_string = '"' + entry_1_string + '"'
#text = merged_books_df.loc[merged_books_df['Book_Title'] == entry_1_string,'ISBN_ID']
#book_Id_entry = text.iloc[0]
#book_Id_entry

root.mainloop()

