In [48]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import Pipeline

def recommend_book(book_ratings, book_df):
    # Convert the book_ratings dictionary to a DataFrame
    ratings_df = pd.DataFrame.from_dict(book_ratings, orient='index', columns=['rating']).reset_index().rename(columns={'index': 'title'})
    
    # Combine the book_ratings and book_df DataFrames
    combined_df = pd.merge(book_df, ratings_df, on='title', how='left')
    
    # Split the DataFrame into train and test sets
    train_df = combined_df[~combined_df['rating'].isna()]
    test_df = combined_df[combined_df['rating'].isna()].copy()  # Use copy() method to create an independent DataFrame
    
    # Create a pipeline for the KNN regressor
    pipeline = Pipeline([
        ('tfidf', TfidfVectorizer()),
        ('knn', KNeighborsRegressor(n_neighbors=3))
    ])
    
    # Train the KNN regressor
    pipeline.fit(train_df['title'], train_df['rating'])
    
    # Predict ratings for the test set
    test_df['predicted_rating'] = pipeline.predict(test_df['title'])
    
    # Find the book with the highest predicted rating
    recommended_book = test_df.loc[test_df['predicted_rating'].idxmax()]
    
    return recommended_book


# Example usage:
book_ratings = {
    'Book A': 4.5,
    'Book B': 3.0,
    'Book C': 4.8
}

book_df = pd.DataFrame({
    'title': ['Book A', 'Book B', 'Book C', 'Book D', 'Book E'],
    'year': [2000, 2005, 2010, 2015, 2020],
    'author': ['Author 1', 'Author 2', 'Author 3', 'Author 4', 'Author 5']
})

recommended_book = recommend_book(book_ratings, book_df)
print(recommended_book)


title                 Book D
year                    2015
author              Author 4
rating                   NaN
predicted_rating         4.1
Name: 3, dtype: object
