In [None]:
import numpy as np # linear algebra
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans

In [None]:
#Import Dataset 
df_books = pd.read_csv("D:\\TJ\\MACHINELEARNING\\BookRecommendation\\datasets\\books_data.csv");

In [None]:
df_rating = pd.read_csv("D:\\TJ\\MACHINELEARNING\\BookRecommendation\\datasets\\Books_rating.csv");

In [None]:
df_books.head()

In [None]:
df_rating.head()

In [None]:
books_data = pd.merge(df_books,df_rating)

In [None]:
books_data.info()

In [None]:
books_data.describe()

In [None]:
#Drop Rows with missing values 
books_data=books_data.dropna()

In [None]:
#Since Dataset is huge , Sampling dataset with 4% data
books_data_sample = books_data.sample(frac=.1,random_state=1)
books_data_sample.shape

In [None]:
books_data_sample.head()

In [None]:
books_data_pivoted = books_data_sample.pivot_table(index='Title',values='review/score', aggfunc='mean').fillna(0)
books_data_pivoted


In [None]:
#Split The test and train data 60 train and 40 test
books_data_train, books_data_test = train_test_split(books_data_pivoted, test_size=0.2, random_state=42) 
books_data_train

In [None]:
books_data_test

In [None]:
plt.hist(books_data_train['review/score'].to_numpy())

In [None]:
#Build NN Model
from sklearn.neighbors import NearestNeighbors

# Build NearestNeighbors Object
model_nn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=30, n_jobs=-1)

# Fit the NearestNeighbor
model_nn.fit(books_data_train['review/score'].to_numpy().reshape(-1, 1))

In [None]:
# Get top 10 nearest neighbors based on rating or review/score
print(books_data_test.iloc[2])
indices=model_nn.kneighbors(books_data_test.iloc[[2]], 10, return_distance=False)
print(indices)

# Print the recommended books
print("Recommended Books:")
print("==================")
for i in range(0, len(indices[0])):
    print((i+1),". ",books_data_train.iloc[indices[0][i]])

In [None]:
kmeans = KMeans(n_clusters=7, random_state=0)
cluster_labels = kmeans.fit_predict(books_data_train['review/score'].to_numpy().reshape(-1, 1))

In [None]:
print(books_data_test.iloc[2])
predictedLabel = kmeans.predict(books_data_test.iloc[2]['review/score'].reshape(-1, 1))
predictedLabel[0]
predictedBookIndices = np.where(kmeans.labels_ == predictedLabel[0])
print("Recommended Books:")
print("==================")
for i in range(0, 10):
    print((i+1),". ",books_data_train.iloc[predictedBookIndices[0][i]])

In [None]:
from sklearn import metrics
silhouette = metrics.silhouette_score(books_data_train['review/score'].to_numpy().reshape(-1, 1), cluster_labels)
dunn = metrics.davies_bouldin_score(books_data_train['review/score'].to_numpy().reshape(-1, 1), cluster_labels)
chs = metrics.calinski_harabasz_score(books_data_train['review/score'].to_numpy().reshape(-1, 1), cluster_labels)
print("kMeans: silhouette: ", silhouette, ", dunn: ", dunn, ", chs: ", chs)

In [None]:
from scipy.cluster.hierarchy import dendrogram, linkage
import scipy.cluster.hierarchy as shc
from matplotlib import pyplot as plt
dend = shc.dendrogram(shc.linkage(books_data_pivoted['review/score'].to_numpy().reshape(-1, 1), method = 'ward'))


In [None]:
from sklearn.cluster import AgglomerativeClustering
cluster = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean', linkage = 'ward')
predictedLabelAgglomerative = cluster.fit_predict(books_data_pivoted['review/score'].to_numpy().reshape(-1, 1))

In [None]:
silhouette_agglomerative = metrics.silhouette_score(books_data_pivoted['review/score'].to_numpy().reshape(-1, 1), predictedLabelAgglomerative)
dunn_aglomerative = metrics.davies_bouldin_score(books_data_pivoted['review/score'].to_numpy().reshape(-1, 1), predictedLabelAgglomerative)
chs_aglomerative = metrics.calinski_harabasz_score(books_data_pivoted['review/score'].to_numpy().reshape(-1, 1), predictedLabelAgglomerative)
print("Agglomerative: silhouette: ", silhouette_agglomerative, ", dunn: ", dunn_aglomerative, ", chs: ", chs_aglomerative)

In [None]:
books_data_cluster = books_data_pivoted.copy()
books_data_cluster['Cluster'] = predictedLabelAgglomerative

In [None]:
book = books_data_cluster.iloc[10]
print(book)

cluster = book['Cluster']
predictedBookIndicesAgglomerative = np.where(predictedLabelAgglomerative == cluster)
print("Recommended Books:")
print("==================")
for i in range(0, 10):
    print((i+1),". ",books_data_cluster.iloc[predictedBookIndicesAgglomerative[0][i]])