<a href="https://colab.research.google.com/github/123nol/AI-projects/blob/main/Copy_of_fcc_book_recommendation_knn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import libraries (you may add additional imports but you may not have to)
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix,hstack
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [None]:
# get data files
!wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip

!unzip book-crossings.zip

books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

--2025-02-16 20:08:41--  https://cdn.freecodecamp.org/project-data/books/book-crossings.zip
Resolving cdn.freecodecamp.org (cdn.freecodecamp.org)... 104.26.3.33, 172.67.70.149, 104.26.2.33, ...
Connecting to cdn.freecodecamp.org (cdn.freecodecamp.org)|104.26.3.33|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 26085508 (25M) [application/zip]
Saving to: ‘book-crossings.zip’


2025-02-16 20:08:41 (187 MB/s) - ‘book-crossings.zip’ saved [26085508/26085508]

Archive:  book-crossings.zip
  inflating: BX-Book-Ratings.csv     
  inflating: BX-Books.csv            
  inflating: BX-Users.csv            


In [None]:
# import csv data into dataframes
df_books = pd.read_csv(books_filename,
                       encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author','publication_year'],
    usecols=['isbn', 'title', 'author','publication_year'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str','publication_year': 'str'}

                       )



df_ratings = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})

df_books['publication_year']=pd.to_numeric(df_books['publication_year'],errors="coerce").astype('Int64')
date_mode=int(df_books['publication_year'].mode()[0])
df_books['publication_year'].fillna(date_mode)
scaler=MinMaxScaler()
df_books['normalized_date']=scaler.fit_transform(df_books['publication_year'].values.reshape(-1,1))


df_books.head()


Unnamed: 0,isbn,title,author,publication_year,normalized_date
0,195153448,Classical Mythology,Mark P. O. Morford,2002,0.976585
1,2005018,Clara Callan,Richard Bruce Wright,2001,0.976098
2,60973129,Decision in Normandy,Carlo D'Este,1991,0.97122
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,0.975122
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,0.975122


In [None]:
def vectorizeBookProperties(rating_df, books_df):
    rating_df_copy = rating_df.copy()
    unique_isbns = rating_df['isbn'].unique()
    unique_users = rating_df['user'].unique()

    book_indices_map = pd.Series(range(len(unique_isbns)), index=unique_isbns)
    row_indices = rating_df['isbn'].map(book_indices_map)
    col_indices = rating_df['user'].map({user: idx for idx, user in enumerate(unique_users)})

    data = rating_df['rating'].values
    final_pivot = csr_matrix((data, (row_indices, col_indices)),
                             shape=(len(unique_isbns), len(unique_users)))

    books_df_copy = books_df.copy()
    numeric_cols = books_df_copy.select_dtypes(include=np.number).columns
    books_df_copy[numeric_cols] = books_df_copy[numeric_cols].astype(float)  # Convert to float
    books_df_copy[numeric_cols] = books_df_copy[numeric_cols].fillna(books_df_copy[numeric_cols].mean())

    # FIX: Ensure 'normalized_date' is treated as float
    date_values = books_df_copy['normalized_date'].values.reshape(-1, 1)
    modelDate = NearestNeighbors(metric='euclidean', algorithm='ball_tree')
    modelDate.fit(date_values)

    # Normalize rating matrix
    normal_rating_matrix = final_pivot / final_pivot.max()
    modelPivot = NearestNeighbors(metric='cosine', algorithm='brute')
    modelPivot.fit(normal_rating_matrix)

    return modelPivot, book_indices_map, books_df_copy, final_pivot, modelDate

In [None]:
def get_recommends(book=""):
    # Precompute models once (move outside this function in production)
    model, book_indices_map, books_df_copy, final_pivot, modelDate = vectorizeBookProperties(df_ratings, df_books)
    book_isbn = books_df_copy[books_df_copy['title'] == book]['isbn'].iloc[0]
    book_df_index = books_df_copy[books_df_copy['isbn'] == book_isbn].index[0]

    target_date = books_df_copy.loc[book_df_index, 'normalized_date']
    distance_two, idx_two = modelDate.kneighbors([[target_date]], n_neighbors=6)

    neighbor_indices = idx_two.flatten()[1:]  # Now gives 4 indices
    close_date_isbns = books_df_copy.iloc[neighbor_indices]['isbn'].values
    for disbn in close_date_isbns:
      if disbn not in book_indices_map:
        continue
      else:

        book_index = book_indices_map[disbn]

        book_vector = final_pivot.getrow(book_index).reshape(1,-1)
        distance_one,idx_one=model.kneighbors(book_vector, n_neighbors=6)
        break
    recommended_isbn=book_indices_map.index[idx_one.flatten()[1:]].values
    print(recommended_isbn)
    recommended_titles=df_books[df_books['isbn'].isin(recommended_isbn)]['title'].tolist()
    relation_degree=1-distance_one.flatten()[1:]
    recommended_books=[book]
    rec_arr=[]

    for i in range(len(recommended_titles)):

      arr=[recommended_titles[i],relation_degree[i]]
      rec_arr.append(arr)
    recommended_books.append(rec_arr)
    # print(final_pivot.shape)
    # print(final_pivot.nnz)

    print(recommended_isbn)





    return recommended_books



In [None]:
books = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
print(books)


def test_book_recommendation():
  test_pass = True
  recommends = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
  if recommends[0] != "Where the Heart Is (Oprah's Book Club (Paperback))":
    test_pass = False
  recommended_books = ["I'll Be Seeing You", 'The Weight of Water', 'The Surgeon', 'I Know This Much Is True']
  recommended_books_dist = [0.8, 0.77, 0.77, 0.77]
  for i in range(2):
    if recommends[1][i][0] not in recommended_books:
      test_pass = False
    if abs(recommends[1][i][1] - recommended_books_dist[i]) >= 0.05:
      test_pass = False
  if test_pass:
    print("You passed the challenge! 🎉🎉🎉🎉🎉")
  else:
    print("You haven't passed yet. Keep trying!")

test_book_recommendation()

["Where the Heart Is (Oprah's Book Club (Paperback))", [['Any four women could rob the Bank of Italy: A novel', 0.4459764877482998], ['Curses!', 0.4459764877482998], ['Men in Trouble', 0.4459764877482998], ['Birds of Passage', 0.4459764877482998], ["Boy's night out", 0.4459764877482998]]]
You haven't passed yet. Keep trying!
