In [26]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping

dataframe = pd.read_csv('data/processed.csv')

dataframe = dataframe.drop_duplicates(subset=['Book-Title'], keep='first')
dataframe = dataframe.sample(frac=0.1, random_state=1)

print("Shape of dataframe: ", dataframe.shape)
dataframe.head()

Shape of dataframe:  (24107, 8)


Unnamed: 0,User-ID,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Location,Age
432024,170229,0,Amerikanisher briefen-shteler,Alexander Harkavy,1999,National Yiddish Book Center,"chicago, illinois, usa",56
511002,222296,0,The Last Day of the War : A Novel,JUDITH CLAIRE MITCHELL,2004,Pantheon,"chapel hill, north carolina, usa",34
188701,127233,0,The Politics of Disillusionment: The Chinese C...,Hsi-Sheng Chi,1990,M E Sharpe Inc,"marietta, georgia, usa",29
1634,23768,0,Danny and the Dinosaur,Syd Hoff,1992,HarperTrophy,"st. louis, missouri, usa",45
504325,181176,0,Feminine Mystique,Betty Friedan,1963,W W Norton &amp; Co,"st. louis, missouri, usa",43


In [27]:
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(dataframe['Book-Title'])

X_train, X_test = train_test_split(X, test_size=0.2, random_state=1)

model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(X_train.shape[1], activation='linear')
])

earlyStopping = EarlyStopping(monitor='loss', patience=10, verbose=0, mode='min')

model.compile(
    loss='mse', 
    optimizer='adam'
)

model.fit(
    X_train.toarray(),
    X_train.toarray(),
    epochs=50,
    batch_size=32,
    callbacks=[earlyStopping]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50


<keras.src.callbacks.History at 0x2dd1e9810>

In [28]:
def recommend(title):
    titleVector = vectorizer.transform([title])
    titleVector = titleVector.toarray()

    # Get the predicted vector
    predictedVector = model.predict(titleVector)
    predictedVector = predictedVector[0]

    # Get the indices of the top 10 books
    indices = np.argsort(predictedVector)[::-1][:10]

    # Get the titles of the top 10 books
    titles = dataframe['Book-Title'].iloc[indices]

    # Print the titles
    print("\nIf you like " + title + ", you will also like:")

    for title in titles:
        print(title)

In [29]:
recommend("The Hobbit")


If you like The Hobbit, you will also like:
Blue Angel
Modelos De Mujer (ColecciÃ³n Andanzas)
Irresistible Forc
Bunny Runs Away (School Friends Series)
Flynn's In (Vintage Crime/Black Lizard)
Ottos Boy: Too Many Targets
The First Time I Got Paid For It : Writers' Tales From The Hollywood Trenches
Hard Aground: A Novel
The Bug Book/Book and Bottle (Hand in Hand with Nature)
Wind and Weather: Climates, Clouds, Snow, Tornadoes, and How Weather Is Predicted (Scholastic Voyages of Discovery. Natural History)


In [30]:
recommend("The Lord of the Rings")


If you like The Lord of the Rings, you will also like:
Blue Angel
Modelos De Mujer (ColecciÃ³n Andanzas)
Irresistible Forc
Bunny Runs Away (School Friends Series)
Flynn's In (Vintage Crime/Black Lizard)
Ottos Boy: Too Many Targets
The First Time I Got Paid For It : Writers' Tales From The Hollywood Trenches
Hard Aground: A Novel
The Bug Book/Book and Bottle (Hand in Hand with Nature)
Wind and Weather: Climates, Clouds, Snow, Tornadoes, and How Weather Is Predicted (Scholastic Voyages of Discovery. Natural History)
