In [None]:
!pip install weaviate-client

In [9]:
import pandas as pd
import weaviate
import os
from dotenv import load_dotenv
import csv
import ast
load_dotenv()

True

## Setup the enviorment

In [10]:
WEAVIATE_CLUSTER_URL = os.getenv('WEAVIATE_CLUSTER_URL')
WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [11]:
client = weaviate.Client(
    url=WEAVIATE_CLUSTER_URL,
    auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY), 
    additional_headers={"X-OpenAI-Api-Key": OPENAI_API_KEY}
    )

In [38]:
class_name = "book"
properties = [
    {"name": "title", "dataType": ["text"]},
    {"name": "description", "dataType": ["text"]},
    {"name": "author", "dataType": ["text"]},
    {"name": "cover", "dataType": ["text"], "moduleConfig": {"text2vec-openai":{'skip': True}}},
    {"name": "genres", "dataType": ["text[]"]},
    {"name": "rating", "dataType": ["number"]},
]

class_object = {
    'class': class_name,
    'properties': properties,
    "vectorizer": "text2vec-openai",
    "moduleConfig": {
        "text2vec-openai": {
            "model": "ada",
            "modelVersion": "002",
            "type": "text"
        }
    }
}

In [49]:
client.schema.delete_class('book')

In [50]:
client.schema.create_class(class_object)

In [8]:
schema = client.schema.get()
schema

{'classes': [{'class': 'Book',
   'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
    'cleanupIntervalSeconds': 60,
    'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
   'moduleConfig': {'text2vec-openai': {'model': 'ada',
     'modelVersion': '002',
     'type': 'text',
     'vectorizeClassName': True}},
   'multiTenancyConfig': {'enabled': False},
   'properties': [{'dataType': ['text'],
     'indexFilterable': True,
     'indexSearchable': True,
     'moduleConfig': {'text2vec-openai': {'skip': False,
       'vectorizePropertyName': False}},
     'name': 'title',
     'tokenization': 'word'},
    {'dataType': ['text'],
     'indexFilterable': True,
     'indexSearchable': True,
     'moduleConfig': {'text2vec-openai': {'skip': False,
       'vectorizePropertyName': False}},
     'name': 'description',
     'tokenization': 'word'},
    {'dataType': ['text'],
     'indexFilterable': True,
     'indexSearchable': True,
     'moduleConfig': {'text2vec-opena

## populate the database with books

In [41]:
df = pd.read_csv('./books_cleaned_data.csv')
print(df.shape)
df[:4]

(11664, 6)


Unnamed: 0,author,cover,description,genres,rating,title
0,أحمد مطر,https://images-na.ssl-images-amazon.com/images...,أحمد مطر شاعر حقيقي في زمن امتلأ بالشعارات الج...,"['شعر', 'أدب', 'سياسة', 'غير روائي']",4.33,لافتات - المجموعة الكاملة
1,محمد بن إدريس الشافعي,https://images-na.ssl-images-amazon.com/images...,يضم هذا الكتاب بين دفتيه كل ما وصل إلينا من شع...,"['شعر', 'ديانة', 'الإسلام', 'أدب', 'غير روائي'...",4.45,ديوان الإمام الشافعي
2,مصطفى السباعي,https://images-na.ssl-images-amazon.com/images...,يعد هذا الكتاب مجموعة من الخواطر الخاصة بالمؤل...,"['سيرة ذاتية', 'غير روائي', 'تطوير الذات', 'فل...",3.97,هكذا علمتني الحياة
3,مصطفى محمود,https://images-na.ssl-images-amazon.com/images...,"رد على أسئله لملحدين على الدين الإسلامي, رد را...","['ديانة', 'فلسفة', 'غير روائي', 'الإسلام', 'إل...",3.88,حوار مع صديقي الملحد


In [17]:
df[-4:]

Unnamed: 0,author,cover,description,genres,rating,title
11660,Ronald L. Graham,https://images-na.ssl-images-amazon.com/images...,Concrete Mathematics is a blending of CONtinuo...,"['Mathematics', 'Computer Science', 'Programmi...",4.33,Concrete Mathematics: A Foundation for Compute...
11661,Allan Farington,https://images-na.ssl-images-amazon.com/images...,Windows 11 2021 Complete New OS User Guide. 33...,[],4.42,Windows 11: 2021 Complete New OS User Guide. 3...
11662,Charles Petzold,https://images-na.ssl-images-amazon.com/images...,"What do flashlights, the British invasion, bla...","['Computer Science', 'Programming', 'Nonfictio...",4.39,Code: The Hidden Language of Computer Hardware...
11663,Andy Hunt,https://images-na.ssl-images-amazon.com/images...,"Straight from the programming trenches, cuts ...","['Programming', 'Computer Science', 'Technolog...",4.33,The Pragmatic Programmer: From Journeyman to M...


In [46]:
# covert genres column from string to list
df['genres']=df['genres'].apply(lambda x: ast.literal_eval(x))

In [52]:

current_book = None
try:
  with client.batch as batch:  # Initialize a batch process
    batch.batch_size = 100
    for index, row in df.iterrows():
      properties = {
          "title": row['title'],
          "description": row['description'],
          "author": row['author'],
          "cover": row['cover'],
          "genres": row['genres'],
          "rating": row['rating'],
      }

      batch.add_data_object(data_object=properties, class_name=class_name)
 
except Exception as e:
  print(f"something happened {e}")


## Semantic Search

In [20]:
response = client.query.get("Book",
                             ["title", "description", "author", "cover", "genres", "rating"],
                             ).with_near_text({
                                    "concepts": ["Tech Startups Books"],
                             }).with_limit(10).do()

response['data']['Get']['Book']

[{'author': 'Yevgeniy Brikman',
  'cover': 'https://images-na.ssl-images-amazon.com/images/S/compressed.photo.goodreads.com/books/1445788782i/26457143.jpg',
  'description': 'At this very moment, somewhere in the world, two programmers are sitting in a garage and creating our future, one line of code at a time. We are in the era of the high tech startup.This book is the "Hello, World" tutorial for building products, technologies, and teams in a startup environment. It\'s based on the experiences of the author, Yevgeniy Brikman, as well as interviews with programmers from some of the most successful startups of the last decade, including Google, Facebook, LinkedIn, Twitter, GitHub, Stripe, Instagram, AdMob, Pinterest, and many others.If you\'re at all interested in startups—whether you\'re a programmer at the beginning of your career, a seasoned developer bored with the politics of large companies, a manager trying to figure out how to motivate your engineers, or just someone trying to 