In [3]:
# You need to install the python Weaviate client
#!pip install weaviate-client
#pip install --upgrade cohere

## ☁️☁️ Configure the Weaviate Cloud Instance ☁️☁️✔ 
### You can have free 14 day  here: https://console.weaviate.cloud/

In [8]:
import os
import weaviate
import json

auth_config = weaviate.auth.AuthApiKey(api_key='<your weaviate api key')
client = weaviate.Client(
    url="https://cohere-hack-jrvynxdk.weaviate.network",
    auth_client_secret=auth_config,
    additional_headers={
        "X-Cohere-Api-Key":"your cohere api key"
    }
)
client.is_ready()

True

## Create Database Schema ✔

In [9]:
# delete existing schema, (note, this will delete all your weaviate data)
client.schema.delete_all()

article_schema = {
    "class": "quran",
    "description": "Ayat and tafsir",
    "vectorizer": "text2vec-cohere",#multi-lingual
    "moduleConfig": {
        "text2vec-cohere": {
            "model": "multilingual-22-12",
            "truncate": "RIGHT"
        }, 

    },
    "vectorIndexConfig": {
        "distance": "dot"
    },
    "properties": [
    {
        "name": "Sura",
        "dataType": [ "int" ],
        "moduleConfig": { "text2vec-cohere": { "skip": True } ,
                "vectorizePropertyName": False}
    },
    {
        "name": "Aya",
        "dataType": [ "string" ],
        "moduleConfig": { "text2vec-cohere": { "skip": True } ,
                "vectorizePropertyName": False}
    },
 {
        "name": "Tafsir",
        "dataType": [ "text" ],
        "description": "Article body",
        "moduleConfig": {
            "text2vec-cohere": {
                "skip": False,
                "vectorizePropertyName": False
            }
        }
    },
    ]
}

# add the schema
#client.schema.delete_all()
client.schema.create_class(article_schema)

print("The schema has been created")

The schema has been created


## Preprocessing ✔

In [10]:
import pandas as pd
df = pd.read_csv('chunked_tafsir.csv')

In [11]:
# Specify the column(s) to be dropped
columns_to_drop = ['Unnamed: 0']

# Use the drop method
df = df.drop(columns=columns_to_drop)


In [12]:
df.head()

Unnamed: 0,Sura,Aya,Tafsir
0,1,1,"In the Name of Allah, the Most Gracious, the M..."
1,1,1,Abu Hurayrah once performed the prayer and rec...
2,1,1,Virtue of Al-Fatihah..Imam Ahmad recorded in h...
3,1,1,"as partners with Him He is Allah, the Creator,..."
4,1,1,"Allah also said, ""And He is ever Rahim (mercif..."


## Batch and Add the file to Weaviate ✔

In [14]:
### Step 1 - configure Weaviate Batch, which optimizes CRUD operations in bulk
# - starting batch size of 100
# - dynamically increase/decrease based on performance
# - add timeout retries if something goes wrong

client.batch.configure(
    batch_size=200,
    dynamic=True,
    timeout_retries=3,
)

<weaviate.batch.crud_batch.Batch at 0x22aa2139640>

In [15]:
data = df[:10000] # make sure it is not more than 10k objects

counter=0

with client.batch as batch:
    for idx, item in data.iterrows():        
        # print update message every 100 objects        
        if (counter %100 == 0):
            print(f"Import {counter} / {len(data)} ", end="\r")

        properties = {
        "Sura": item["Sura"],
        "Aya": item["Aya"],
        "Tafsir": item["Tafsir"]       
        }

        #vector = item["emb"]

        batch.add_data_object(properties, "quran", None, None)
        counter = counter+1
    print(f"Import {counter} / {len(data)}")
    print("Import complete")

Import 3743 / 3743 
Import complete


## 🔎🔎All the ways you can search your data:🔍🔍

### 1. Classic Word Search 
- Basic word matching. Look for the occurence of a word in the document.

### 2. Vector Search
- Find closest object vectors closest to query vector. Fetches objects the have similar meaning to the query.

### 3. Hybrid Search - combine word and semantic match.
- Perform both word and vector search and then combine the results.

### 4. Generative Search - search and interpret with an LLM.
- Search for semantically relevant documents to a prompt and then provide them as context to a LLM to guide its generation.

In [31]:
# Assuming a hypothetical library with a different syntax
result = (
    client.query.aggregate("quran")
    .count()  # Adjust this line based on library documentation
    .do()
)

print("Object count: ", result["count"])


AttributeError: 'AggregateBuilder' object has no attribute 'count'

### 1. Classic Word Search 

In [18]:
where_filter = {
  "path": ["tafsir"],
  "operator": "Like", 
  "valueString": "prophet"
}

query_result = (
  client.query
  .get("quran", ["sura", "aya","tafsir"])
  .with_where(where_filter)
  .with_limit(3)
  .do()
)

print(json.dumps(query_result, indent=2))

{
  "data": {
    "Get": {
      "Quran": [
        {
          "aya": "6,7",
          "sura": 1,
        },
        {
          "aya": "4",
          "sura": 2,
          "tafsir": "no distinction between any of them (Messengers.\") (4:152),.This is a sample of the Ayat that indicate that the true believers all believe in Allah, His Messengers and His Books..The faithful among the People of the Book, have a special significance here, since they believe in their Books and in all of the details related to that, so when such people embrace Islam and sincerely believe in the details of the religion, then they will get two rewards As for the others, they can only believe in the previous religious teachings in a general way For instance, the Prophet stated, : \"When the People of the Book narrate to you, neither reject nor affirm what they say Rather, say, We believe in what was revealed to us and what was revealed to you..\".However, the faith that many Arabs have in the religion of Islam

### 2. Vector Search

In [38]:
query = "The oneness of God. Feeling Grateful"

def semantic_search(query):
    nearText = {
        "concepts": [query], # example from earlier -> 'kitten'
#         "distance": -139.0,
    }

    properties = [
        "sura", "aya", "tafsir",
        "_additional {distance}"
    ]

    response = (
        client.query
        .get("quran", properties)
        .with_near_text(nearText)
        .with_limit(3)
        .do()
    )

    result = response['data']['Get']['quran']

    return result
    
def print_result(result):
    for item in result:
        print(f"\033[95m{item['aya']} ({item['tafsir']}) {item['_additional']['distance']}\033[0m")
        print(f"\033[4m{item['url']}\033[0m")
        print(item['text'])
        print()

In [39]:
query = "The oneness of God. Feeling Grateful"

def semantic_search(query):
    nearText = {
        "concepts": [query],
#         "distance": -139.0,
    }

    properties = [
        "sura", "aya", "tafsir",
        "_additional {distance}"
    ]

    response = (
        client.query
        .get("quran", properties)
        .with_near_text(nearText)
        .with_limit(3)
        .do()
    )

    result = response['data']['Get']['Quran']

    return result
    

res = semantic_search(query)
res

[{'_additional': {'distance': -146.52103},
  'aya': '61,62,63,64,65',
  'sura': 40,
  'tafsir': 'Allah, it is He Who has made the night for you that you may rest therein and the day for you to see Truly, Allah is full of bounty to mankind; yet, most of mankind give no thanks That is Allah, your Lord, the Creator of all things, La ilaha ila Huwa How then are you turning away Thus were turned away those who used to deny the Ayat of Allah Allah, it is He Who has made for you the earth as a dwelling place and the sky as a canopy, and has given you shape and made your shapes good and pure (looking) and has provided you with good things That is Allah, your Lord, so Blessed be Allah, the Lord of all that exists He is the Ever Living, La ilaha illa Huwa; so invoke Him making the religion for Him Alone All the praises and thanks be to Allah, the Lord of all that exits..Signs of the Power and Oneness of Allah..Allah reminds us of His grace towards His creation in that He has given them the night

### ... but wait ... this is a Multi-Lingual Model! 🗣❗️

 - You can use it to perform multilingual search! Search in one language that model understands and recieve relevant documents in any language!

In [42]:
# This is a multi-lingual model so it can take in queries in different languages!

#good movies in hindi

query_result = semantic_search("ईश्वर")

print_result(query_result)

[95m1,2,3,4,5,6,7,8 (Lord.".) -145.16997[0m


KeyError: 'url'

In [43]:
#vacation spots in Farsi

query_result = semantic_search("مکان های تعطیلات")

print_result(query_result)

[95m17,18,19,20 (Verily, those who have Taqwa will be in Gardens and Delight Enjoying in that which their Lord has bestowed on them, and (the fact that) their Lord saved them from the torment of the blazing Fire "Eat and drink with happiness because of what you used to do They will recline on thrones Masfufah And We shall marry them to Hur (fair females) with wide lovely eyes..Description of the Destination of the Happy.Allah the Exalted described the destination of the happy ones, "Verily, those who have Taqwa will be in Gardens and Delight.." in contrast to the torment and punishment of the miserable; "Enjoying in that which their Lord has bestowed on them,." meaning, enjoying the various types of delight that Allah has granted them therein, such as various types of foods, drinks, clothes, dwelling places, mounts, and so forth, "and (the fact that." their Lord saved them from the torment of the blazing Fire.) He saved them from the torment of the Fire, which is a bounty itself Added

KeyError: 'url'

### 3. Hybrid Search:

In [48]:
response = (
    client.query
    .get("quran", ["tafsir", "sura"])
    .with_hybrid(
        query="who's is the prophet?",
        alpha=0
    )
    .with_additional(["score", "explainScore"])
    .with_limit(3)
    .do()
)

print(json.dumps(response, indent=2))

{
  "data": {
    "Get": {
      "Quran": [
        {
          "_additional": {
            "explainScore": "(bm25)\n(hybrid) Document 35012538-1d3e-4295-81dd-0a716737fe3f contributed 0.01639344262295082 to the score",
            "score": "0.016393442"
          },
          "sura": 34,
          "tafsir": "from me and from the Shaytan, and Allah and His Messenger have nothing to do with it. .Truly, He is All-Hearer, Ever Near means, He hears all the words of His servants, and He is always near to respond to them when they call on Him An-Nasa0 0 recorded the H ad0th of Abu Mu0sa0 which also appears in the Two S ah 0h s: \"You are not calling upon one who is deaf or absent; you are calling upon One Who is All-Hearer, Ever Near and Responsive..\"."
        },
        {
          "_additional": {
            "explainScore": "(bm25)\n(hybrid) Document 4559d4e2-4ed9-434a-bd89-6cb7bf4e4b9c contributed 0.016129032258064516 to the score",
            "score": "0.016129032"
          },
     

### 4. Generative Search:
- Attaching your search engine outputs to a LLM to generate with!/

In [50]:
b_res = semantic_search("ibrahim")
print_result(b_res)

[95m65,66,67,68 (right to be followers of Ibrahim are those who followed his religion and this Prophet, Muhammad , and his Companions from the Muhajirin, Ansar and those who followed their lead Sa`id bin Mansur recorded that Ibn Mas`ud said that the Messenger of Allah said, "Every Prophet had a Wali (supporter, best friend." from among the Prophets My Wali among them is my father Ibrahim, the Khalil (intimate friend) of my Lord, the Exalted and Most Honored).The Prophet then recited, "Verily, among mankind who have the best claim to Ibrahim are those who followed him....".Allahs statement, "And Allah is the Wali (Protector and Helper." of the believers.) means, Allah is the Protector of all those who believe in His Messengers..) -147.4716[0m


KeyError: 'url'

In [53]:
generatePrompt = "Write me some questions I can ask {tafsir} here is some information about them {sura}"

result = (
  client.query
  .get("quran", ["tafsir","sura"])
  .with_generate(single_prompt=generatePrompt) # Pass in each obj 1 at a time
  .with_near_text({
    "concepts": ["ismail"]
  })
  .with_limit(3)
).do()

In [54]:
print(json.dumps(result, indent=2))

{
  "errors": [
    {
      "locations": [
        {
          "column": 81,
          "line": 1
        }
      ],
      "message": "Cannot query field \"generate\" on type \"QuranAdditional\".",
      "path": null
    }
  ]
}
