In [None]:
import numpy as np # linear algebra
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans

In [None]:
#Import Dataset 
df_books = pd.read_csv("D:\\TJ\\MACHINELEARNING\\BookRecommendation\\datasets\\books_data.csv");

In [None]:
df_rating = pd.read_csv("D:\\TJ\\MACHINELEARNING\\BookRecommendation\\datasets\\Books_rating.csv");

In [None]:
df_books.head()

In [None]:
df_rating.head()

In [None]:
books_data = pd.merge(df_books,df_rating)

In [None]:
books_data.info()

In [None]:
books_data.describe()

In [None]:
#Drop Rows with missing values 
books_data=books_data.dropna()

In [None]:
#Since Dataset is huge , Sampling dataset with 4% data
books_data_sample = books_data.sample(frac=.03,random_state=1)
books_data_sample.shape

In [None]:
books_data_sample.head()

In [None]:
len(books_data_sample.categories.unique())


In [None]:
#Create Item-user matrix
#Taking 3% as sample 
books_data_sample = books_data.sample(frac=.03,random_state=1)
books_data_sample.shape

In [None]:
#Split The test and train data 60 train and 40 test
books_data_train, books_data_test = train_test_split(books_data_sample, test_size=0.4, random_state=42) 

#Create Item-user matrix using pivot_table()
rating_books_pivot_train = books_data_train.pivot_table(index='Title',values='review/score', aggfunc='mean').fillna(0) 
rating_books_pivot_test = books_data_test.pivot_table(index='Title',values='review/score', aggfunc='mean').fillna(0)

rating_books_pivot_train

In [None]:
rating_books_pivot_train.loc['comeback - a mother and daughter\'s journey through hell and back']

In [None]:
plt.hist(rating_books_pivot_train['review/score'].to_numpy())

In [None]:
#Build NN Model
from sklearn.neighbors import NearestNeighbors

# Build NearestNeighbors Object
model_nn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=30, n_jobs=-1)

# Fit the NearestNeighbor
model_nn.fit(rating_books_pivot_train['review/score'].to_numpy().reshape(-1, 1))

In [None]:
# Get top 10 nearest neighbors based on rating or review/score
indices=model_nn.kneighbors(rating_books_pivot_test.loc[['comeback - a mother and daughter\'s journey through hell and back']], 10, return_distance=False)
print(indices)

# Print the recommended books
print("Recommended Books:")
print("==================")
for i in range(0, len(indices[0])):
    print((i+1),". ",rating_books_pivot_train.iloc[indices[0][i]])

In [None]:
kmeans = KMeans(n_clusters=7, random_state=0)
cluster_labels = kmeans.fit_predict(rating_books_pivot_train['review/score'].to_numpy().reshape(-1, 1))

In [None]:
predictedLabel = kmeans.predict(rating_books_pivot_test.loc[['comeback - a mother and daughter\'s journey through hell and back']].to_numpy())
predictedLabel[0]
predictedBookIndices = np.where(kmeans.labels_ == predictedLabel[0])
print("Recommended Books:")
print("==================")
for i in range(0, 10):
    print((i+1),". ",rating_books_pivot_train.iloc[predictedBookIndices[0][i]])

In [None]:
from sklearn import metrics
silhouette = metrics.silhouette_score(rating_books_pivot_train['review/score'].to_numpy().reshape(-1, 1), cluster_labels)
dunn = metrics.davies_bouldin_score(rating_books_pivot_train['review/score'].to_numpy().reshape(-1, 1), cluster_labels)
chs = metrics.calinski_harabasz_score(rating_books_pivot_train['review/score'].to_numpy().reshape(-1, 1), cluster_labels)
print("kMeans: silhouette: ", silhouette, ", dunn: ", dunn, ", chs: ", chs)

from scipy.cluster.hierarchy import dendrogram, linkage
linked = linkage(rating_books_pivot_train['review/score'].to_numpy().reshape(-1, 1), 'single')
labelList = range(1, 11)
plt.figure(figsize = (10, 7))
dendrogram(linked, orientation = 'top',labels = labelList, distance_sort ='descending',show_leaf_counts = True)
plt.show()

In [None]:
from scipy.cluster.hierarchy import dendrogram, linkage
import scipy.cluster.hierarchy as shc
from matplotlib import pyplot as plt
dend = shc.dendrogram(shc.linkage(rating_books_pivot_train['review/score'].to_numpy().reshape(-1, 1), method = 'ward'))


In [None]:
from sklearn.cluster import AgglomerativeClustering
cluster = AgglomerativeClustering(n_clusters = 7, affinity = 'euclidean', linkage = 'ward')
predictedLabelAgglomerative = cluster.fit_predict(rating_books_pivot_train['review/score'].to_numpy().reshape(-1, 1))

In [None]:
silhouette_agglomerative = metrics.silhouette_score(rating_books_pivot_train['review/score'].to_numpy().reshape(-1, 1), predictedLabelAgglomerative)
dunn_aglomerative = metrics.davies_bouldin_score(rating_books_pivot_train['review/score'].to_numpy().reshape(-1, 1), predictedLabelAgglomerative)
chs_aglomerative = metrics.calinski_harabasz_score(rating_books_pivot_train['review/score'].to_numpy().reshape(-1, 1), predictedLabelAgglomerative)
print("Agglomerative: silhouette: ", silhouette_agglomerative, ", dunn: ", dunn_aglomerative, ", chs: ", chs_aglomerative)

In [None]:
rating_books_pivot_train_cluster = rating_books_pivot_train
rating_books_pivot_train_cluster['Cluster'] = predictedLabelAgglomerative
rating_books_pivot_train

In [None]:
cluster = rating_books_pivot_train_cluster.loc[['comeback - a mother and daughter\'s journey through hell and back']]['Cluster'][0]
predictedBookIndicesAgglomerative = np.where(predictedLabelAgglomerative == cluster)
print("Recommended Books:")
print("==================")
for i in range(0, 10):
    print((i+1),". ",rating_books_pivot_train.iloc[predictedBookIndicesAgglomerative[0][i]])