<a href="https://colab.research.google.com/github/Natasyaamb/-Cryptography-Implementasi-AES256-SHA256-dan-BPCS-/blob/main/Sistem_Pencarian_Rekomendasi_Buku_TF_IDF_dan_Cosine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import precision_score, recall_score, f1_score
from ipywidgets import widgets, Layout, VBox, Output
from IPython.display import display

# Pastikan untuk mendownload resources NLTK yang diperlukan
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Membaca data dari Excel
df = pd.read_excel('dataset_summary_book.xlsx')  # Pastikan nama file dan path sudah benar

# Fungsi preprocessing
def preprocess(text):
    tokens = word_tokenize(text.lower())
    tokens = [word for word in tokens if word.isalnum() and word not in stopwords.words('english')]
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return ' '.join(tokens)

df['processed_summary'] = df['summary'].apply(preprocess)

# Debug: Print a few processed summaries
print(df['processed_summary'].head())

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


0    old major old boar manor farm call animal farm...
1    alex teenager living england lead gang nightly...
2    text plague divided five part town oran thousa...
3    argument enquiry proceeds series incremental s...
4    novel posit space around milky way divided con...
Name: processed_summary, dtype: object


In [None]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['processed_summary'])

In [None]:
num_clusters = 5  # Tentukan jumlah cluster yang sesuai
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X)

df['cluster'] = kmeans.labels_



In [None]:
# Fungsi untuk menemukan label cluster yang relevan
def find_relevant_cluster(user_input):
    processed_input = preprocess(user_input)
    input_vector = vectorizer.transform([processed_input])
    cluster_label = kmeans.predict(input_vector)[0]
    return cluster_label

# Fungsi untuk menampilkan rekomendasi buku
def recommend_books(user_input):
    processed_input = preprocess(user_input)
    input_vector = vectorizer.transform([processed_input])

    # Calculate cosine similarity for all books
    df['similarity'] = cosine_similarity(X, input_vector).flatten()

    # Filter books with similarity > 0
    relevant_books = df[df['similarity'] > 0].copy()

    # Debugging
    processed_input_debug = processed_input
    input_vector_shape_debug = input_vector.shape
    relevant_books_count_debug = len(relevant_books)

    # Use processed summaries in the final recommendation display
    relevant_books['summary'] = relevant_books['processed_summary']  # Show processed summary

    # Tampilkan semua buku yang relevan
    relevant_books_display = relevant_books[['title', 'book author', 'summary', 'publication date', 'genres']]

    # Sort books by similarity and get top recommendations
    top_recommendations = relevant_books.sort_values(by='similarity', ascending=False).head(5)

    # Pastikan Anda menggunakan nama kolom yang benar seperti yang ada dalam output df.columns
    return relevant_books_display, top_recommendations[['title', 'similarity']], processed_input_debug, input_vector_shape_debug, relevant_books_count_debug

# Fungsi untuk menampilkan rekomendasi berdasarkan input sinopsis
def get_book_recommendations(summary):
    relevant_books, recommendations, processed_input, input_vector_shape, relevant_books_count = recommend_books(summary)
    return relevant_books, recommendations.values.tolist(), processed_input, input_vector_shape, relevant_books_count

# Membuat kolom input untuk sinopsis
summary_input = widgets.Textarea(
    value='',
    placeholder='Masukkan sinopsis buku di sini...',
    description='Summary:',
    disabled=False,
    layout=Layout(width='50%', height='100px')
)

# Membuat tombol untuk mendapatkan rekomendasi
button = widgets.Button(description="Get Recommendations")

# Fungsi untuk menampilkan rekomendasi
def on_button_click(b):
    summary = summary_input.value
    relevant_books, recommendations, processed_input, input_vector_shape, relevant_books_count = get_book_recommendations(summary)
    with output:
        output.clear_output()
        print(f"Processed user input: {processed_input}")
        print(f"Relevant books found: {relevant_books_count}")
        print("\nRelevant books found:")
        display(relevant_books)
        print("\nTop Book recommendations:")
        for title, similarity in recommendations:
            print(f"Title: {title}, Similarity: {similarity}")

# Menambahkan fungsi ke tombol
button.on_click(on_button_click)

# Menambahkan output
output = Output()

# Menampilkan kolom input dan tombol
display(VBox([summary_input, button, output]))

VBox(children=(Textarea(value='', description='Summary:', layout=Layout(height='100px', width='50%'), placehol…

In [None]:
from sklearn.model_selection import train_test_split

# Evaluasi sistem rekomendasi
def evaluate_recommendation_system(test_data):
    y_true = []
    y_pred = []
    for index, row in test_data.iterrows():
        input_text = row['summary']
        processed_input = preprocess(input_text)
        input_vector = vectorizer.transform([processed_input])
        df['similarity'] = cosine_similarity(X, input_vector).flatten()
        recommendations = df[df['similarity'] > 0].sort_values(by='similarity', ascending=False).head(5)
        recommended_titles = recommendations['title'].tolist()
        if row['title'] in recommended_titles:  # Cek apakah buku ada di rekomendasi
            y_pred.append(1)  # Book correctly recommended
        else:
            y_pred.append(0)  # Book not recommended
        y_true.append(1)  # Actual book

    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    return precision, recall, f1

# Split dataset into train and test sets
X_train, X_test = train_test_split(df, test_size=0.2, random_state=42)

precision, recall, f1 = evaluate_recommendation_system(X_test)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Precision: 1.0
Recall: 1.0
F1-score: 1.0
