# Quran Iterative Search Using OpenAPI GPT API

In [1]:
import openai
import os, time, re
import pandas as pd

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

openai.api_key  = os.getenv('OPENAI_API_KEY')

In [3]:
def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

### Initial Context Setup & Run

In [36]:
%%time
chapter = f"""
1|عَمَّ يَتَسَاءَلُونَ
2|عَنِ النَّبَإِ الْعَظِيمِ
3|الَّذِي هُمْ فِيهِ مُخْتَلِفُونَ
4|كَلَّا سَيَعْلَمُونَ
5|ثُمَّ كَلَّا سَيَعْلَمُونَ
6|أَلَمْ نَجْعَلِ الْأَرْضَ مِهَادًا
7|وَالْجِبَالَ أَوْتَادًا
8|وَخَلَقْنَاكُمْ أَزْوَاجًا
9|وَجَعَلْنَا نَوْمَكُمْ سُبَاتًا
10|وَجَعَلْنَا اللَّيْلَ لِبَاسًا
11|وَجَعَلْنَا النَّهَارَ مَعَاشًا
12|وَبَنَيْنَا فَوْقَكُمْ سَبْعًا شِدَادًا
13|وَجَعَلْنَا سِرَاجًا وَهَّاجًا
14|وَأَنزَلْنَا مِنَ الْمُعْصِرَاتِ مَاءً ثَجَّاجًا
15|لِّنُخْرِجَ بِهِ حَبًّا وَنَبَاتًا
16|وَجَنَّاتٍ أَلْفَافًا
17|إِنَّ يَوْمَ الْفَصْلِ كَانَ مِيقَاتًا
18|يَوْمَ يُنفَخُ فِي الصُّورِ فَتَأْتُونَ أَفْوَاجًا
19|وَفُتِحَتِ السَّمَاءُ فَكَانَتْ أَبْوَابًا
20|وَسُيِّرَتِ الْجِبَالُ فَكَانَتْ سَرَابًا
21|إِنَّ جَهَنَّمَ كَانَتْ مِرْصَادًا
22|لِّلطَّاغِينَ مَآبًا
23|لَّابِثِينَ فِيهَا أَحْقَابًا
24|لَّا يَذُوقُونَ فِيهَا بَرْدًا وَلَا شَرَابًا
25|إِلَّا حَمِيمًا وَغَسَّاقًا
26|جَزَاءً وِفَاقًا
27|إِنَّهُمْ كَانُوا لَا يَرْجُونَ حِسَابًا
28|وَكَذَّبُوا بِآيَاتِنَا كِذَّابًا
29|وَكُلَّ شَيْءٍ أَحْصَيْنَاهُ كِتَابًا
30|فَذُوقُوا فَلَن نَّزِيدَكُمْ إِلَّا عَذَابًا
31|إِنَّ لِلْمُتَّقِينَ مَفَازًا
32|حَدَائِقَ وَأَعْنَابًا
33|وَكَوَاعِبَ أَتْرَابًا
34|وَكَأْسًا دِهَاقًا
35|لَّا يَسْمَعُونَ فِيهَا لَغْوًا وَلَا كِذَّابًا
36|جَزَاءً مِّن رَّبِّكَ عَطَاءً حِسَابًا
37|رَّبِّ السَّمَاوَاتِ وَالْأَرْضِ وَمَا بَيْنَهُمَا الرَّحْمَٰنِ ۖ لَا يَمْلِكُونَ مِنْهُ خِطَابًا
38|يَوْمَ يَقُومُ الرُّوحُ وَالْمَلَائِكَةُ صَفًّا ۖ لَّا يَتَكَلَّمُونَ إِلَّا مَنْ أَذِنَ لَهُ الرَّحْمَٰنُ وَقَالَ صَوَابًا
39|ذَٰلِكَ الْيَوْمُ الْحَقُّ ۖ فَمَن شَاءَ اتَّخَذَ إِلَىٰ رَبِّهِ مَآبًا
40|إِنَّا أَنذَرْنَاكُمْ عَذَابًا قَرِيبًا يَوْمَ يَنظُرُ الْمَرْءُ مَا قَدَّمَتْ يَدَاهُ وَيَقُولُ الْكَافِرُ يَا لَيْتَنِي كُنتُ تُرَابًا
"""

keyword = "جعل"

prompt = f"""
- Your task is to search for a keyword in a text of the Holy Quran.
- The Quran text is delimited by triple dashes and the keyword is placed between angle brackets.
- The Quran text is organized as follows:
    - Each verse is in a separate line
    - Each verse is preceded with the verse number
    
- For each occurrence you find, display the verse with its number using this format:
    Ayah (verse number): verse (with keyword surrounded by parenthesis)
    
- Finally, display the number of occurrences.

The chapter text:
---{chapter}---
The keyword:
<{keyword}>
"""

CPU times: user 3 µs, sys: 11 µs, total: 14 µs
Wall time: 26.7 µs


In [40]:
%%time
response = get_completion(prompt)
print(response)

Ayah (6): أَلَمْ نَجْعَلِ الْأَرْضَ مِهَادًا
Ayah (7): وَالْجِبَالَ أَوْتَادًا
Ayah (8): وَخَلَقْنَاكُمْ أَزْوَاجًا
Ayah (9): وَجَعَلْنَا نَوْمَكُمْ سُبَاتًا
Ayah (10): وَجَعَلْنَا اللَّيْلَ لِبَاسًا
Ayah (11): وَجَعَلْنَا النَّهَارَ مَعَاشًا
Ayah (12): وَبَنَيْنَا فَوْقَكُمْ سَبْعًا شِدَادًا
Ayah (13): وَجَعَلْنَا سِرَاجًا وَهَّاجًا
Ayah (15): لِّنُخْرِجَ بِهِ حَبًّا وَنَبَاتًا
Ayah (16): وَجَنَّاتٍ أَلْفَافًا
Ayah (21): إِنَّ جَهَنَّمَ كَانَتْ مِرْصَادًا
Ayah (22): لِّلطَّاغِينَ مَآبًا
Ayah (23): لَّابِثِينَ فِيهَا أَحْقَابًا
Ayah (36): جَزَاءً مِّن رَّبِّكَ عَطَاءً حِسَابًا

Number of occurrences: 13
CPU times: user 28.1 ms, sys: 12.7 ms, total: 40.8 ms
Wall time: 16.9 s


In [54]:
df_gaala = pd.DataFrame(columns=['Response', 'Occurrences'])
len(df_gaala.index)

0

In [136]:
def get_results(df, prompt, iterations=1, pause=0):
    
    for i in range(iterations):
        # API call
        response = get_completion(prompt)

        # Extract the number of occurrences from response
        try:
            occurs = int(re.findall(r'\d+', response)[-1])
        #         occurs = int(response.split(sep='occurrences: ')[1])
        except ValueError:
            occurs = None
        except IndexError:
            print(f'Didn\'t find occurrences.\nThe response:\n{response}')

        # Append row
        df.loc[len(df.index)] = [response, occurs]

        # Wait before next API call
        time.sleep(pause)
        
    return df

## Prompts

### Semantic Search

In [103]:
keyword_semantic = "جاعل"
prompt_semantic = f"""
- Your task is to search for a keyword in a text of the Holy Quran.
- The Quran text is delimited by triple dashes and the keyword is placed between angle brackets.
- The keyword is provided in Arabic characters.
- Match the semantic meaning of the keyword.
- The Quran text is organized as follows:
    - Each verse is in a separate line
    - Each verse is preceded with the verse number
    
- For each occurrence you find, display the verse with its number using this format:
    Ayah (verse number): verse (with keyword surrounded by parenthesis)
    
- Finally, display the number of occurrences.

The chapter text:
---{chapter}---
The keyword:
<{keyword_semantic}>
"""

In [108]:
%%time
# Dataframe holding the results
df_semantic = pd.DataFrame(columns=['Response', 'Occurrences'])
get_results(df_semantic, prompt_semantic, iterations=100, pause=4)

RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID deaa025f4bad8ec7c58827b782d423d2 in your message.)

In [109]:
# Only completed 74 iterations before APIConnectionError from server side (due to high demand)
df_semantic

Unnamed: 0,Response,Occurrences
0,Occurrences found:\nAyah (8): وَخَلَقْنَاكُمْ ...,9
1,Occurrences found:\nAyah (8): وَخَلَقْنَاكُمْ ...,8
2,Occurrences found:\nAyah (8): وَخَلَقْنَاكُمْ ...,9
3,Occurrences found:\nAyah (8): وَخَلَقْنَاكُمْ ...,9
4,Occurrences found:\nAyah (8): وَخَلَقْنَاكُمْ ...,9
...,...,...
69,Occurrences found:\nAyah (8): وَخَلَقْنَاكُمْ ...,10
70,Occurrences found:\nAyah (8): وَخَلَقْنَاكُمْ ...,8
71,Occurrences found:\nAyah (8): وَخَلَقْنَاكُمْ ...,9
72,Occurrences found:\nAyah (8): وَخَلَقْنَاكُمْ ...,7


### Diacritical Search

In [137]:
keyword_diacritical = "يَوْمَ"

prompt_diacritical = f"""
- Your task is to search for a keyword in a text of the Holy Quran.
- The Quran text is delimited by triple dashes and the keyword is placed between angle brackets.
- The keyword is provided in Arabic characters with diacritics.
- Match the exact keyword with the exact Arabic diacritics.
- The Quran text is organized as follows:
    - Each verse is in a separate line
    - Each verse is preceded with the verse number
    
- For each occurrence you find, display the verse with its number using this format:
    Ayah (verse number): verse (with keyword surrounded by parenthesis)
    
- Finally, display the number of occurrences.

The chapter text:
---{chapter}---
The keyword:
<{keyword_diacritical}>
"""

In [86]:
# First trial with one iteration
response_diac = get_completion(prompt_diacritical)
print(response_diac)

Occurrences found:
Ayah (3): الَّذِي هُمْ فِيهِ مُخْتَلِفُونَ (جَعَلَ)
Ayah (6): أَلَمْ نَجْعَلِ الْأَرْضَ مِهَادًا (جَعَلَ)
Ayah (7): وَالْجِبَالَ أَوْتَادًا (جَعَلَ)
Ayah (8): وَخَلَقْنَاكُمْ أَزْوَاجًا (جَعَلَ)
Ayah (9): وَجَعَلْنَا نَوْمَكُمْ سُبَاتًا (جَعَلَ)
Ayah (10): وَجَعَلْنَا اللَّيْلَ لِبَاسًا (جَعَلَ)
Ayah (11): وَجَعَلْنَا النَّهَارَ مَعَاشًا (جَعَلَ)
Ayah (12): وَبَنَيْنَا فَوْقَكُمْ سَبْعًا شِدَادًا (جَعَلَ)
Ayah (13): وَجَعَلْنَا سِرَاجًا وَهَّاجًا (جَعَلَ)
Ayah (15): لِّنُخْرِجَ بِهِ حَبًّا وَنَبَاتًا (جَعَلَ)
Ayah (16): وَجَنَّاتٍ أَلْفَافًا (جَعَلَ)
Ayah (25): إِلَّا حَمِيمًا وَغَسَّاقًا (جَعَلَ)
Ayah (31): إِنَّ لِلْمُتَّقِينَ مَفَازًا (جَعَلَ)
Ayah (32): حَدَائِقَ وَأَعْنَابًا (جَعَلَ)
Ayah (33): وَكَوَاعِبَ أَتْرَابًا (جَعَلَ)
Ayah (34): وَكَأْسًا دِهَاقًا (جَعَلَ)
Ayah (36): جَزَاءً مِّن رَّبِّكَ عَطَاءً حِسَابًا (جَعَلَ)

Total occurrences found: 16


In [141]:
%%time
# Dataframe holding the results
df_diacritical = pd.DataFrame(columns=['Response', 'Occurrences'])
get_results(df_diacritical, prompt_diacritical, iterations=100, pause=4)

df_diacritical

Unnamed: 0,Response,Occurrences
0,Ayah (verse number): verse (with keyword surro...,8
1,Ayah (verse number): verse (with keyword surro...,8
2,Ayah (verse number): verse (with keyword surro...,8
3,Ayah (verse number): verse (with keyword surro...,8
4,Ayah (verse number): verse (with keyword surro...,8
5,Ayah (verse number): verse (with keyword surro...,8
6,Ayah (verse number): verse (with keyword surro...,8
7,Ayah (verse number): verse (with keyword surro...,8
8,Ayah (verse number): verse (with keyword surro...,8
9,Ayah (verse number): verse (with keyword surro...,8


In [101]:
# Another run completed 9 iterations only before APIConnectionError
df_diacritical

Unnamed: 0,Response,Occurrences
0,Occurrences found:\nAyah (3): الَّذِي هُمْ فِي...,14
1,Occurrences found:\nAyah (3): الَّذِي هُمْ فِي...,14
2,Occurrences found:\nAyah (3): الَّذِي هُمْ فِي...,14
3,Occurrences found:\nAyah (3): الَّذِي هُمْ فِي...,9
4,Occurrences found:\nAyah (3): الَّذِي هُمْ فِي...,9
5,Occurrences found:\nAyah (3): الَّذِي هُمْ فِي...,9
6,Occurrences found:\nAyah (3): الَّذِي هُمْ فِي...,9
7,Occurrences found:\nAyah (3): الَّذِي هُمْ فِي...,14
8,Occurrences found:\nAyah (3): الَّذِي هُمْ فِي...,9


### Exact Match

In [123]:
keyword_exact = "جعل"
prompt_exact = f"""
- Your task is to search for a keyword in a text of the Holy Quran.
- The Quran text is delimited by triple dashes and the keyword is placed between angle brackets.
- The keyword is provided in Arabic characters.
- Match the exact keyword (exact match).
- The Quran text is organized as follows:
    - Each verse is in a separate line
    - Each verse is preceded with the verse number
    
- For each occurrence you find, display the verse with its number using this format:
    Ayah (verse number): verse (with keyword surrounded by parenthesis)
    
- Finally, display the number of occurrences.

The chapter text:
---{chapter}---
The keyword:
<{keyword_exact}>
"""

In [126]:
%%time
# Dataframe holding the results
# Hallucinations: All results are false, and from different context "Chapter" than specified
# This is the only trial that completed the 100 iterations successfully without connection error
df_exact = pd.DataFrame(columns=['Response', 'Occurrences'])
get_results(df_exact, prompt_exact, iterations=100, pause=4)

CPU times: user 1.39 s, sys: 1.01 s, total: 2.4 s
Wall time: 1h 36min 22s


Unnamed: 0,Response,Occurrences
0,Ayah (6): وَلَقَدْ جَعَلْنَا فِي السَّمَاءِ بُ...,9
1,Ayah (6): وَلَقَدْ جَعَلْنَا فِي السَّمَاءِ بُ...,10
2,Ayah (6): وَلَقَدْ جَعَلْنَا فِي السَّمَاءِ بُ...,9
3,Ayah (6): وَلَقَدْ جَعَلْنَا فِي السَّمَاءِ بُ...,14
4,Ayah (6): وَلَقَدْ جَعَلْنَا فِي السَّمَاءِ بُ...,9
...,...,...
95,Ayah (6): وَلَقَدْ جَعَلْنَا فِي السَّمَاءِ بُ...,31
96,Ayah (6): وَلَقَدْ جَعَلْنَا فِي السَّمَاءِ بُ...,9
97,Ayah (6): وَلَقَدْ جَعَلْنَا فِي السَّمَاءِ بُ...,31
98,Ayah (6): وَلَقَدْ جَعَلْنَا فِي السَّمَاءِ بُ...,28


In [132]:
df_exact.loc[:, 'Occurrences'].mean()

22.53

In [115]:
# Exact match of another keyword "وجعلنا"
df_exact

Unnamed: 0,Response,Occurrences
0,Occurrences found:\nAyah (9): وَجَعَلْنَا نَوْ...,4
1,Occurrences found:\nAyah (9): وَجَعَلْنَا نَوْ...,4
2,Occurrences found:\nAyah (9): وَجَعَلْنَا نَوْ...,4
3,Occurrences found:\nAyah (9): وَجَعَلْنَا نَوْ...,4
4,Occurrences found:\nAyah (9): وَجَعَلْنَا نَوْ...,4
5,Occurrences found:\nAyah (9): وَجَعَلْنَا نَوْ...,4
6,Occurrences found:\nAyah (9): وَجَعَلْنَا نَوْ...,4
7,Occurrences found:\nAyah (9): وَجَعَلْنَا نَوْ...,4
8,Occurrences found:\nAyah (9): وَجَعَلْنَا نَوْ...,4
9,Occurrences found:\nAyah (9): وَجَعَلْنَا نَوْ...,4


### Partial Match

In [117]:
keyword_partial = "جعل"
prompt_partial = f"""
- Your task is to search for a keyword in a text of the Holy Quran.
- The Quran text is delimited by triple dashes and the keyword is placed between angle brackets.
- The keyword is provided in Arabic characters.
- The match doesn't need to be exact; it could be a partial match.
- The Quran text is organized as follows:
    - Each verse is in a separate line
    - Each verse is preceded with the verse number
    
- For each occurrence you find, display the verse with its number using this format:
    Ayah (verse number): verse (with keyword surrounded by parenthesis)
    
- Finally, display the number of occurrences.

The chapter text:
---{chapter}---
The keyword:
<{keyword_exact}>
"""

In [120]:
%%time
# Dataframe holding the results
df_partial = pd.DataFrame(columns=['Response', 'Occurrences'])
get_results(df_partial, prompt_partial, iterations=100, pause=4)

RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID ccf88d519fa928c23a4abb7f5f376795 in your message.)

In [121]:
df_partial

Unnamed: 0,Response,Occurrences
0,Occurrences of <وجعلنا> in the Quran:\nAyah (9...,4
1,Occurrences of <وجعلنا> in the Quran:\nAyah (9...,4
2,Occurrences found:\nAyah (9): وَجَعَلْنَا نَوْ...,4
3,Occurrences of <وجعلنا> in the Quran:\nAyah (9...,4
4,Occurrences of <وجعلنا> in the Quran:\nAyah (9...,4
5,Occurrences of <وجعلنا> in the Quran:\nAyah (9...,4
6,Occurrences of <وجعلنا> in the Quran:\nAyah (9...,4
7,Occurrences found:\nAyah (9): وَجَعَلْنَا نَوْ...,4
8,Occurrences found:\nAyah (9): وَجَعَلْنَا نَوْ...,4
9,Occurrences found:\nAyah (9): وَجَعَلْنَا نَوْ...,4


### Saving results

In [81]:
df_gaala.to_csv('BoradMatch_gaala_20.csv')

In [105]:
df_diacritical.to_csv('DiacriticalMatch_gaala_9.csv')

In [110]:
df_semantic.to_csv('SemanticMatch_gael_74.csv')

In [116]:
df_exact.to_csv('ExactMatch_wagaalna_16.csv')

In [122]:
df_partial.to_csv('PartialMatch_gaala.csv')

In [127]:
df_exact.to_csv('ExactMatch_gaala_100.csv')

In [142]:
df_diacritical.to_csv('DiacriticalMatch_yawma_24.csv')