# Install OpenAI library

In [None]:
!pip install -q openai

# Import the needed Libraries

In [1]:
import openai
import os
import pandas as pd
import numpy as np

# Authentication

In [None]:
os.environ['OPENAI_API_KEY'] = 'YOUR_OPENAI_API_KEY'
openai.api_key = os.getenv('OPENAI_API_KEY')

# Load the books dataset into Pandas DataFrame

In [3]:
df = pd.read_csv('books_dataset.csv')

In [4]:
df

Unnamed: 0,isbn13,title,authors,categories,description,published_year,average_rating
0,9780002005883,Gilead,Marilynne Robinson,Fiction,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85
1,9780002261982,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83
2,9780006163831,The One Tree,Stephen R. Donaldson,American fiction,Volume Two of Stephen Donaldson's acclaimed se...,1982.0,3.97
3,9780006178736,Rage of angels,Sidney Sheldon,Fiction,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93
4,9780006280897,The Four Loves,Clive Staples Lewis,Christian life,Lewis' work on the nature of love divides love...,2002.0,4.15
...,...,...,...,...,...,...,...
6805,9788185300535,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,This collection of the timeless teachings of o...,1999.0,4.51
6806,9788185944609,Secrets Of The Heart,Khalil Gibran,Mysticism,,1993.0,4.08
6807,9788445074879,Fahrenheit 451,Ray Bradbury,Book burning,,2004.0,3.98
6808,9789027712059,The Berlin Phenomenology,Georg Wilhelm Friedrich Hegel,History,Since the three volume edition ofHegel's Philo...,1981.0,0.00


# cleaning the dataset from missing values

In [5]:
df.dropna(inplace = True)

In [6]:
df

Unnamed: 0,isbn13,title,authors,categories,description,published_year,average_rating
0,9780002005883,Gilead,Marilynne Robinson,Fiction,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85
1,9780002261982,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83
2,9780006163831,The One Tree,Stephen R. Donaldson,American fiction,Volume Two of Stephen Donaldson's acclaimed se...,1982.0,3.97
3,9780006178736,Rage of angels,Sidney Sheldon,Fiction,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93
4,9780006280897,The Four Loves,Clive Staples Lewis,Christian life,Lewis' work on the nature of love divides love...,2002.0,4.15
...,...,...,...,...,...,...,...
6803,9788173031014,Journey to the East,Hermann Hesse,Adventure stories,This book tells the tale of a man who goes on ...,2002.0,3.70
6804,9788179921623,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82
6805,9788185300535,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,This collection of the timeless teachings of o...,1999.0,4.51
6808,9789027712059,The Berlin Phenomenology,Georg Wilhelm Friedrich Hegel,History,Since the three volume edition ofHegel's Philo...,1981.0,0.00


# lets sort our dataset with the top 3000 rating books

In [7]:
df = df.sort_values('average_rating', ascending =False).head(3000)
df

Unnamed: 0,isbn13,title,authors,categories,description,published_year,average_rating
6738,9781932206081,Insights,Frederick Lenz,Spiritual life,"In 1983, when Rama - Dr. Frederick P. Lenz rec...",2003.0,5.00
5398,9780851621814,The Complete Theory Fun Factory,Katie Elliott;Ian Martin,Juvenile Nonfiction,(Boosey & Hawkes Scores/Books). Contains the m...,1996.0,5.00
5972,9781551052700,Ecuador Nature Guide,Christopher D. Jiggins,Botanique,The guide provides information on 76 species o...,2000.0,5.00
6671,9781890995522,The Diamond Color Meditation,John Diamond,Health & Fitness,The Diamond Color Meditation presents an inspi...,2006.0,5.00
4306,9780739844328,Bill Gates,Sara Barton-Wood,Juvenile Nonfiction,"Presents the life of Bill Gates, from his chil...",2001.0,5.00
...,...,...,...,...,...,...,...
829,9780140447804,The Consolation of Philosophy,Boethius;Victor Watts,Literary Criticism,The Consolation of Philosophy is perhaps uniqu...,1999.0,3.97
1831,9780345435750,Morgawr,Terry Brooks,Fiction,Shocking revelations challenge the beliefs of ...,2003.0,3.97
5189,9780812555240,A Quantum Murder,Peter F. Hamilton,Fiction,Professor Edward Kitchener's savage murder thr...,1998.0,3.97
6584,9781857231106,The Elfstones of Shannara,Terry Brooks,Elves,An ancient evil threatens the Elves and the Ra...,1982.0,3.97


# Embedding Cost Calculation

In [8]:
!pip install tiktoken -q
import tiktoken

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/1.2 MB[0m [31m4.1 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.2/1.2 MB[0m [31m18.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [9]:
import tiktoken
enc = tiktoken.encoding_for_model('text-embedding-ada-002')
description = list(df['description'])
total_tokens = sum([len(enc.encode(item)) for item in description])
print(f'Total tokens: {total_tokens}')
cost = total_tokens * (0.0001/1000)
print(f'Estimated cost in USD: {cost:.10F}')

Total tokens: 250615
Estimated cost in USD: 0.0250615000


# Calculate the embeddings and cache them locally

In [10]:
def get_embedding(text):
    response = openai.embeddings.create(
        input=text,
        model='text-embedding-ada-002'
    )
    return response.data[0].embedding

In [11]:
def get_embeddings_and_save_to_csv(embedding_cache_file):
  df['embedding'] = df['description'].apply(lambda x: get_embedding(x))
  df.to_csv(embedding_cache_file)

In [12]:
embedding_cache_file = 'book_embeddings.csv'
get_embeddings_and_save_to_csv(embedding_cache_file)

# Load the Embedding file

In [13]:
embedding_cashe_file ='book_embeddings.csv'
df_embeddings = pd.read_csv(embedding_cashe_file)

In [14]:
df_embeddings

Unnamed: 0.1,Unnamed: 0,isbn13,title,authors,categories,description,published_year,average_rating,embedding
0,6738,9781932206081,Insights,Frederick Lenz,Spiritual life,"In 1983, when Rama - Dr. Frederick P. Lenz rec...",2003.0,5.00,"[0.00188809959217906, 0.008238980546593666, 0...."
1,5398,9780851621814,The Complete Theory Fun Factory,Katie Elliott;Ian Martin,Juvenile Nonfiction,(Boosey & Hawkes Scores/Books). Contains the m...,1996.0,5.00,"[-0.012495927512645721, -0.015322787687182426,..."
2,5972,9781551052700,Ecuador Nature Guide,Christopher D. Jiggins,Botanique,The guide provides information on 76 species o...,2000.0,5.00,"[-0.012820487841963768, 0.01280108280479908, -..."
3,6671,9781890995522,The Diamond Color Meditation,John Diamond,Health & Fitness,The Diamond Color Meditation presents an inspi...,2006.0,5.00,"[0.005100540351122618, 0.0192433912307024, 0.0..."
4,4306,9780739844328,Bill Gates,Sara Barton-Wood,Juvenile Nonfiction,"Presents the life of Bill Gates, from his chil...",2001.0,5.00,"[0.011100049130618572, -0.03010716289281845, -..."
...,...,...,...,...,...,...,...,...,...
2995,829,9780140447804,The Consolation of Philosophy,Boethius;Victor Watts,Literary Criticism,The Consolation of Philosophy is perhaps uniqu...,1999.0,3.97,"[0.026534831151366234, -0.015267528593540192, ..."
2996,1831,9780345435750,Morgawr,Terry Brooks,Fiction,Shocking revelations challenge the beliefs of ...,2003.0,3.97,"[0.014133764430880547, -0.04154350236058235, -..."
2997,5189,9780812555240,A Quantum Murder,Peter F. Hamilton,Fiction,Professor Edward Kitchener's savage murder thr...,1998.0,3.97,"[-0.0018574735149741173, -0.011641745455563068..."
2998,6584,9781857231106,The Elfstones of Shannara,Terry Brooks,Elves,An ancient evil threatens the Elves and the Ra...,1982.0,3.97,"[-0.0012093591503798962, -0.047867294400930405..."


#  lets convert the string embedding column to a numpy array

In [15]:
df_embeddings['embedding']

Unnamed: 0,embedding
0,"[0.00188809959217906, 0.008238980546593666, 0...."
1,"[-0.012495927512645721, -0.015322787687182426,..."
2,"[-0.012820487841963768, 0.01280108280479908, -..."
3,"[0.005100540351122618, 0.0192433912307024, 0.0..."
4,"[0.011100049130618572, -0.03010716289281845, -..."
...,...
2995,"[0.026534831151366234, -0.015267528593540192, ..."
2996,"[0.014133764430880547, -0.04154350236058235, -..."
2997,"[-0.0018574735149741173, -0.011641745455563068..."
2998,"[-0.0012093591503798962, -0.047867294400930405..."


In [16]:
type(df_embeddings['embedding'].iloc[0])

str

In [17]:
df_embeddings['embedding'] = df_embeddings['embedding'].apply(eval).apply(np.array)

In [18]:
type(df_embeddings['embedding'].iloc[0])

numpy.ndarray

In [19]:
df_embeddings['embedding']

Unnamed: 0,embedding
0,"[0.00188809959217906, 0.008238980546593666, 0...."
1,"[-0.012495927512645721, -0.015322787687182426,..."
2,"[-0.012820487841963768, 0.01280108280479908, -..."
3,"[0.005100540351122618, 0.0192433912307024, 0.0..."
4,"[0.011100049130618572, -0.03010716289281845, -..."
...,...
2995,"[0.026534831151366234, -0.015267528593540192, ..."
2996,"[0.014133764430880547, -0.04154350236058235, -..."
2997,"[-0.0018574735149741173, -0.011641745455563068..."
2998,"[-0.0012093591503798962, -0.047867294400930405..."


# Get Recommendation from Title

In [22]:
from sklearn.metrics.pairwise import cosine_similarity

def get_recommendation_from_title(df_embeddings, title, k):
    if title not in df_embeddings['title'].values:
        return False

    df_embeddings['embedding'] = df_embeddings['embedding'].apply(lambda x: np.array(x))

    target_embedding = df_embeddings.loc[df_embeddings['title'] == title, 'embedding'].values[0].reshape(1, -1)

    # CALCULATE COSINE SIMILARITY WITH ALL EMBEDDINGS
    similarities = df_embeddings['embedding'].apply(
        lambda x: cosine_similarity(target_embedding, x.reshape(1, -1))[0][0]
    )

    df_embeddings['similarity'] = similarities
    df_sorted = df_embeddings.sort_values(by='similarity', ascending=False)

    recommendations = []
    for _, row in df_sorted.iloc[0:k+1].iterrows():
        book = {
            'title': row['title'],
            'description': row['description'],
            'similarity': row['similarity']
        }
        recommendations.append(book)

    return recommendations

In [23]:
get_recommendation_from_title(df_embeddings, 'Animal', 10)

[{'title': 'Animal',
  'description': 'Offers photographs and information about mammals, birds, reptiles, amphibians, fishes, and invertebrates from throughout the world.',
  'similarity': 1.0000000000000009},
 {'title': 'Reptiles and Amphibians',
  'description': 'Collects annotated photographs and descriptions of over 400 species of reptiles and amphibians, such as turtles, lizards, snakes, newts, and frogs.',
  'similarity': 0.906180081907386},
 {'title': 'In Focus',
  'description': 'A collection of nearly three hundred photographs from "National Geographic," representing the work of more than one hundred fifty acclaimed photographers, captures portrait images of people from around the world.',
  'similarity': 0.8474255503206254},
 {'title': 'Ecuador Nature Guide',
  'description': 'The guide provides information on 76 species of birds, plants, mammals and insects of Ecuador. Each species description is accompanied by an illustration as well as information on ecology, local names a

In [26]:
get_recommendation_from_title(df_embeddings, 'Colossians and Philemon', 10)

[{'title': 'Colossians and Philemon',
  'description': 'For over one hundred years International Critical Commentaries have had a special place among works on the Bible. They bring together all the relevant aids to exegesis - linguistic, textual, archaeological, historical, literary, and theological - to help the reader understand the meaning of the books of the Old and New Testaments. The new commentaries continue this tradition. All new evidence now available is incorporated and new methods of study are applied. The authors are of the highest international standing. No attempt has been made to secure a uniform theological or critical approach to the biblical text: contributors have been invited for their scholarly distinction, not for their adherence to any one school of thought.',
  'similarity': 0.9999999999999996},
 {'title': "Journey into God's Word",
  'description': "Life is a journey, and like any journey, it requires an accurate, reliable roadmap to get us where we need to go