In [21]:
import sys

assert sys.version_info >= (3, 7)

In [22]:
from packaging import version
import sklearn

assert version.parse(sklearn.__version__) >= version.parse("1.0.1")

In [23]:
import matplotlib.pyplot as plt

plt.rc('font', size=14)
plt.rc('axes', labelsize=14, titlesize=14)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)

In [24]:
# !pip install isbnlib
# !pip install newspaper3k
# !pip install goodreads_api_client

In [25]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [26]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [27]:
df = pd.read_csv('drive/MyDrive/AI/book_tags.csv', error_bad_lines = False)



  df = pd.read_csv('drive/MyDrive/AI/book_tags.csv', error_bad_lines = False)


In [28]:

plt.rc('font', size=14)
plt.rc('axes', labelsize=14, titlesize=14)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)

In [29]:
clf = DecisionTreeClassifier(random_state=42)


In [30]:
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import NearestNeighbors

In [31]:
print(df.columns)

Index(['goodreads_book_id', 'tag_id', 'count'], dtype='object')


In [None]:
# Drop duplicates based on the combination of 'tag_id' and 'goodreads_book_id'
df_no_duplicates = df.drop_duplicates(subset=['tag_id', 'goodreads_book_id'])

# Create a user-item matrix
user_item_matrix = df_no_duplicates.pivot(index='tag_id', columns='goodreads_book_id', values='count').fillna(0)

In [None]:


# Transpose the matrix to have books as rows and bookshelves as columns
item_user_matrix = user_item_matrix.T

# Split the data into training and testing sets
train_data, test_data = train_test_split(item_user_matrix.values, test_size=0.2, random_state=42)

# Initialize and fit a Nearest Neighbors model
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(train_data)



In [None]:
# Assuming this is a simple recommendation function
def recommend_items(user_history, num_recommendations=5):
    # Implement your recommendation logic
    recommendations = np.argsort(user_history)[-num_recommendations:][::-1]
    return recommendations

# Evaluate recommendations for each "user" in the test set
evaluation_results = []

def hit_rate(actual_items, recommended_items):
    return int(any(item in actual_items for item in recommended_items))

def mean_reciprocal_rank(rank_list):
    for i, rank in enumerate(rank_list, start=1):
        if rank == 1:
            return 1 / i
    return 0

def discounted_cumulative_gain(actual_items, recommended_items, k=None):
    if k is None:
        k = len(recommended_items)

    # Calculate the gain for each recommended item
    gain = [1 if item in actual_items else 0 for item in recommended_items]

    # Calculate discounted cumulative gain
    dcg = np.sum([(2 ** g - 1) / np.log2(i + 2) for i, g in enumerate(gain[:k])])

    return dcg

def normalized_discounted_cumulative_gain(actual_items, recommended_items, k=None):
    # Calculate discounted cumulative gain
    dcg = discounted_cumulative_gain(actual_items, recommended_items, k)

    # Calculate ideal discounted cumulative gain
    ideal_gain = sorted([1 if item in actual_items else 0 for item in recommended_items], reverse=True)
    idcg = np.sum([(2 ** g - 1) / np.log2(i + 2) for i, g in enumerate(ideal_gain[:k])])

    # Calculate normalized discounted cumulative gain
    ndcg = dcg / idcg if idcg > 0 else 0

    return ndcg

for user_index in range(test_data.shape[0]):
    user_history = train_data[user_index, :]
    actual_items = test_data[user_index, :]

    # Get recommended items for the user
    recommended_items = recommend_items(user_history)

    # Calculate and store metrics (Hit Rate, MRR, NDCG, etc.)
    hit = hit_rate(actual_items, recommended_items)
    mrr = mean_reciprocal_rank([1 if item in actual_items else 0 for item in recommended_items])
    ndcg = normalized_discounted_cumulative_gain(actual_items, recommended_items, k=5)

    evaluation_results.append({'hit_rate': hit, 'mrr': mrr, 'ndcg': ndcg})


# Calculate average metrics across all users
average_hit_rate = np.mean([result['hit_rate'] for result in evaluation_results])
average_mrr = np.mean([result['mrr'] for result in evaluation_results])
average_ndcg = np.mean([result['ndcg'] for result in evaluation_results])

print("Average Hit Rate:", average_hit_rate)
print("Average Mean Reciprocal Rank:", average_mrr)
print("Average Normalized Discounted Cumulative Gain:", average_ndcg)

In [None]:
# Function to get recommendations for a book
def get_recommendations(book_id, model, item_user_matrix, num_neighbors=5):
    distances, indices = model.kneighbors(item_user_matrix[book_id].values.reshape(1, -1), n_neighbors=num_neighbors + 1)
    recommendations = []
    for i in range(1, len(distances.flatten())):
        recommendations.append({
            'book_id': item_user_matrix.index[indices.flatten()[i]],
            'distance': distances.flatten()[i]
        })
    return recommendations

# Example: Get recommendations for a specific book (replace 'your_book_id' with an actual book ID)
book_id_to_recommend = '6348045'
recommendations = get_recommendations(book_id_to_recommend, model, item_user_matrix)
print(f"Recommendations for Book {book_id_to_recommend}:", recommendations)