In [None]:
from google.colab import files
files.upload()  # Upload kaggle.json here


Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"alaashorbaji","key":"f9eab450ac83b0d23640b302ec6436b2"}'}

In [None]:
# Move kaggle.json to ~/.kaggle and set permissions
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
# Download the dataset from Kaggle
!kaggle datasets download -d zygmunt/goodbooks-10k

# Unzip the downloaded file
!unzip -q goodbooks-10k.zip


Dataset URL: https://www.kaggle.com/datasets/zygmunt/goodbooks-10k
License(s): CC-BY-SA-4.0
Downloading goodbooks-10k.zip to /content
  0% 0.00/11.6M [00:00<?, ?B/s]
100% 11.6M/11.6M [00:00<00:00, 854MB/s]


In [None]:
import pandas as pd

# Load book metadata
books = pd.read_csv('books.csv')
# Load user ratings
ratings = pd.read_csv('ratings.csv')

print('Books shape:', books.shape)
print('Ratings shape:', ratings.shape)
ratings.head()


Books shape: (10000, 23)
Ratings shape: (981756, 3)


Unnamed: 0,book_id,user_id,rating
0,1,314,5
1,1,439,3
2,1,588,5
3,1,1169,4
4,1,1185,4


In [None]:
ratings.head(10)


Unnamed: 0,book_id,user_id,rating
0,1,314,5
1,1,439,3
2,1,588,5
3,1,1169,4
4,1,1185,4
5,1,2077,4
6,1,2487,4
7,1,2900,5
8,1,3662,4
9,1,3922,5


In [None]:
books.head(5)

Unnamed: 0,id,book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
0,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,4780653,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...
1,2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4602479,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...
2,3,41865,41865,3212258,226,316015849,9780316000000.0,Stephenie Meyer,2005.0,Twilight,...,3866839,3916824,95009,456191,436802,793319,875073,1355439,https://images.gr-assets.com/books/1361039443m...,https://images.gr-assets.com/books/1361039443s...
3,4,2657,2657,3275794,487,61120081,9780061000000.0,Harper Lee,1960.0,To Kill a Mockingbird,...,3198671,3340896,72586,60427,117415,446835,1001952,1714267,https://images.gr-assets.com/books/1361975680m...,https://images.gr-assets.com/books/1361975680s...
4,5,4671,4671,245494,1356,743273567,9780743000000.0,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2683664,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...


In [None]:
from sklearn.preprocessing import LabelEncoder

# Encode user_id and book_id into integer indices
user_enc = LabelEncoder()
book_enc = LabelEncoder()

ratings['user'] = user_enc.fit_transform(ratings['user_id'])
ratings['book'] = book_enc.fit_transform(ratings['book_id'])

# Save number of users/books for embedding layers
num_users = ratings['user'].nunique()
num_books = ratings['book'].nunique()

print(f"Unique users: {num_users}")
print(f"Unique books: {num_books}")


Unique users: 53424
Unique books: 10000


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

X_user = ratings['user'].values
X_book = ratings['book'].values
y = ratings['rating'].values.astype(np.float32)

# Split into training and testing sets
X_user_tr, X_user_te, X_book_tr, X_book_te, y_tr, y_te = train_test_split(
    X_user, X_book, y, test_size=0.2, random_state=42)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout
from tensorflow.keras.models import Model

embedding_dim = 50                       # size of latent factors

# ----- Inputs -----
user_in  = Input(shape=(1,), name='user_in')
book_in  = Input(shape=(1,), name='book_in')

# ----- Embedding layers -----
user_emb = Embedding(num_users, embedding_dim, name='user_emb')(user_in)
book_emb = Embedding(num_books, embedding_dim, name='book_emb')(book_in)

# ----- Flatten vectors -----
user_vec = Flatten()(user_emb)
book_vec = Flatten()(book_emb)

# ----- Concatenate and pass through an MLP -----
x = Concatenate()([user_vec, book_vec])
x = Dense(128, activation='relu')(x)
x = Dropout(0.30)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.30)(x)

# ----- Output: predicted rating (1–5) -----
out = Dense(1, activation='linear')(x)

model = Model(inputs=[user_in, book_in], outputs=out)
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.summary()


In [None]:
history = model.fit(
    [X_user_tr, X_book_tr], y_tr,
    validation_data=([X_user_te, X_book_te], y_te),
    epochs=20,
    batch_size=512,
    verbose=1
)


Epoch 1/20
[1m1534/1534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 32ms/step - loss: 2.6755 - mae: 1.1964 - val_loss: 0.7403 - val_mae: 0.6835
Epoch 2/20
[1m1534/1534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 32ms/step - loss: 0.8787 - mae: 0.7436 - val_loss: 0.7202 - val_mae: 0.6673
Epoch 3/20
[1m1534/1534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 32ms/step - loss: 0.8010 - mae: 0.7070 - val_loss: 0.7171 - val_mae: 0.6580
Epoch 4/20
[1m1534/1534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 33ms/step - loss: 0.7347 - mae: 0.6754 - val_loss: 0.7148 - val_mae: 0.6587
Epoch 5/20
[1m1534/1534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 32ms/step - loss: 0.6773 - mae: 0.6464 - val_loss: 0.7101 - val_mae: 0.6595
Epoch 6/20
[1m1534/1534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 32ms/step - loss: 0.6147 - mae: 0.6124 - val_loss: 0.7187 - val_mae: 0.6582
Epoch 7/20
[1m1534/1534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [None]:
import numpy as np

def recommend_books(user_raw_id, top_n=10):
    """
    Return a DataFrame with the top-N book recommendations for a raw user_id.
    """
    # Convert raw user_id to encoded index
    user_idx = user_enc.transform([user_raw_id])[0]

    # Predict ratings for every book
    all_book_indices = np.arange(num_books)
    preds = model.predict(
        [np.full(num_books, user_idx), all_book_indices],
        batch_size=1024,
        verbose=0
    ).reshape(-1)

    # Get top-N highest-scoring books
    top_idx = np.argsort(preds)[-top_n:][::-1]
    top_book_ids = book_enc.inverse_transform(top_idx)

    # Map to titles & authors
    recs = books[books['book_id'].isin(top_book_ids)][['book_id', 'title', 'authors']]
    return recs.reset_index(drop=True)

# Example: recommendations for user with raw ID 42
recommendations = recommend_books(user_raw_id=42, top_n=10)
recommendations


Unnamed: 0,book_id,title,authors
0,976,Deception Point,Dan Brown
1,7455,Sex and the City,Candace Bushnell
2,4708,The Beautiful and Damned,F. Scott Fitzgerald
