# 1. Gerekli Kütüphaneleri Yükle


In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


# 2. Veri Setini Yükle

In [3]:
df = pd.read_csv("../books/books_dataset.csv")

# 3. İlk Gözlem

In [4]:
print("Toplam kitap sayısı:", len(df))
df.head()

Toplam kitap sayısı: 6810


Unnamed: 0,isbn13,isbn10,title,subtitle,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count
0,9780002005883,2005883,Gilead,,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0
1,9780002261982,2261987,Spider's Web,A Novel,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0
2,9780006163831,6163831,The One Tree,,Stephen R. Donaldson,American fiction,http://books.google.com/books/content?id=OmQaw...,Volume Two of Stephen Donaldson's acclaimed se...,1982.0,3.97,479.0,172.0
3,9780006178736,6178731,Rage of angels,,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0
4,9780006280897,6280897,The Four Loves,,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0


In [5]:
# 4. Boş açıklamaları doldur
df['description'] = df['description'].fillna("")

# 5. TF-IDF vektörleme
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['description'])

# 6. Kozünüs benzerliği hesapla
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# 7. Kitap isimlerinden index oluştur
indices = pd.Series(df.index, index=df['title']).drop_duplicates()



def get_recommendations_with_similarity(title, cosine_sim=cosine_sim, top_n=5):
    idx = indices.get(title)

    if idx is None:
        print(f"Kitap bulunamadı: {title}")
        return pd.DataFrame()

    sim_scores = list(enumerate(cosine_sim[idx]))

    # Her bir skoru kontrol et ve tek bir değere dönüştür
    sim_scores = [(i, score[0] if isinstance(score, np.ndarray) else score) for i, score in sim_scores]

    # Skorları kontrol et ve np.ndarray olup olmadığını denetle
    for i, (index, score) in enumerate(sim_scores):
        if isinstance(score, np.ndarray):
            sim_scores[i] = (index, score[0])  # İlk öğeyi kullan, bu hata önler

    # Skorları azalan düzende sıralama
    sim_scores = sorted(sim_scores, key=lambda x: float(x[1]), reverse=True)[1:top_n+1]

    book_indices = [i[0] for i in sim_scores]
    similarities = [round(score * 100, 2) for _, score in sim_scores]  # Yüzdeye çevir

    recommendations = df[['title', 'authors', 'average_rating']].iloc[book_indices].copy()
    recommendations['similarity (%)'] = similarities

    return recommendations



get_recommendations_with_similarity("Gilead")


Unnamed: 0,title,authors,average_rating,similarity (%)
2311,The Handmaid's Tale,Margaret Atwood,4.09,12.55
918,Go Tell it on the Mountain,James Baldwin,4.01,11.25
2248,Children of the Alley,Najīb Maḥfūẓ,4.1,10.19
3360,The Last Eyewitness,Chris Seay;David B. Capes;Brian McLaren,3.77,10.13
4091,John Adams,David McCullough,4.06,9.64


In [8]:
def get_author_books(title, df=df):
    # Kitabın yazarını bul
    if title not in df['title'].values:
        print(f"Kitap bulunamadı: {title}")
        return []

    # Kitabın satırını al
    book = df[df['title'] == title].iloc[0]
    author = book['authors']

    # Aynı yazarın diğer kitaplarını bul (ve bu kitabı çıkar)
    author_books = df[(df['authors'] == author) & (df['title'] != title)]

    # Sonuçları ortalama puana göre sırala
    author_books = author_books.sort_values(by='average_rating', ascending=False)

    return author_books[['title', 'authors', 'average_rating']].head(5)

get_author_books("The Four Loves")


Unnamed: 0,title,authors,average_rating
99,"The Chronicles of Narnia: Lion, the witch and ...",Clive Staples Lewis,4.26
101,"The Screwtape Letters ; With, Screwtape Propos...",Clive Staples Lewis,4.22
3659,"The lion, the witch and the wardrobe",Clive Staples Lewis,4.21
5061,Letters to Children,Clive Staples Lewis,4.18
98,The voyage of the Dawn Treader,Clive Staples Lewis,4.09


In [12]:
def get_multi_book_recommendations(titles, df=df, tfidf=tfidf, tfidf_matrix=tfidf_matrix):
    descriptions = []
    indices_to_exclude = []

    for title in titles:
        idx = indices.get(title)
        if idx is not None:
            desc = df.loc[idx, 'description']
            descriptions.append(str(desc))
            indices_to_exclude.append(idx)
        else:
            print(f"Kitap bulunamadı: {title}")

    if not descriptions:
        return []

    combined_description = " ".join(descriptions)
    query_vec = tfidf.transform([combined_description])
    cosine_similarities = cosine_similarity(query_vec, tfidf_matrix).flatten()

    # Girdi olarak kullanılan kitapları sonuçlardan çıkar
    for idx in indices_to_exclude:
        cosine_similarities[idx] = -1

    top_indices = cosine_similarities.argsort()[::-1][:5]
    similarities = [round(cosine_similarities[i] * 100, 2) for i in top_indices]

    results = df[['title', 'authors', 'average_rating']].iloc[top_indices].copy()
    results['similarity (%)'] = similarities

    return results

get_multi_book_recommendations(["Koko", "The Secret of Chimneys", "Murder in LaMut"])


Unnamed: 0,title,authors,average_rating,similarity (%)
3650,Magician,Raymond E. Feist,4.33,23.82
5195,Legends,George R. R. Martin;Anne McCaffrey,3.88,17.74
6721,The Value of X,Poppy Z. Brite,4.14,14.93
2564,Mind Prey,John Sandford,4.21,12.88
458,Midnighters #2: Touching Darkness,Scott Westerfeld,,12.32
