## Quran Finder


# data preprocessing

# importing the data


In [1]:
import pandas as pd
import speech_recognition as sr
from difflib import get_close_matches
quran_e_pak = pd.read_csv('quran_e_pak.csv')


# correct the bismilah mismatch


In [2]:
def separate_bismillah_in_place(df):
    # Create a list to hold the new rows
    new_rows = []

    for index, row in df.iterrows():
        ayat = row['Ayat']
        if ayat.startswith('بِسْمِ اللَّهِ الرَّحْمَٰنِ الرَّحِيمِ') and len(ayat) > len('بِسْمِ اللَّهِ الرَّحْمَٰنِ الرَّحِيمِ'):
            bismillah = 'بِسْمِ اللَّهِ الرَّحْمَٰنِ الرَّحِيمِ'
            remaining_ayat = ayat[len(bismillah):].strip()

            # Create new row for Bismillah
            new_rows.append([row['Surah_Number'], row['Surah_Name'], 1, bismillah])

            # Update the current row with the remaining Ayat
            new_rows.append([row['Surah_Number'], row['Surah_Name'], row['Ayat_Number'], remaining_ayat])
        else:
            new_rows.append([row['Surah_Number'], row['Surah_Name'], row['Ayat_Number'], ayat])

    # Convert the new rows back to a DataFrame
    new_df = pd.DataFrame(new_rows, columns=['Surah_Number', 'Surah_Name', 'Ayat_Number', 'Ayat'])
    
    # Sort the DataFrame by Surah_Number and Ayat_Number
    new_df = new_df.sort_values(by=['Surah_Number', 'Ayat_Number']).reset_index(drop=True)
    
    return new_df

# Apply the function to the DataFrame
quran_e_pak = separate_bismillah_in_place(quran_e_pak)
quran_e_pak.to_csv('quran_e_pak.csv', index=False)



# Function to get context verses



In [7]:
import pandas as pd
import speech_recognition as sr
from difflib import SequenceMatcher
import re

# Function to normalize Arabic text by removing diacritics
def normalize_text(text):
    return re.sub(r'[\u064B-\u0652]', '', text).strip()

# Function to transcribe audio to text
def transcribe_audio_to_text():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Please recite the Ayat:")
        audio = recognizer.listen(source)
        try:
            text = recognizer.recognize_google(audio, language="ar-SA")
            print(f"Transcription: {text}")
            return text
        except sr.UnknownValueError:
            print("Google Speech Recognition could not understand audio")
            return None
        except sr.RequestError as e:
            print(f"Could not request results; {e}")
            return None

# Function to split transcribed text into words
def split_into_words(transcribed_text):
    return transcribed_text.split()

# Function to find matching verses using word-by-word matching and scoring
def find_matching_verses(transcribed_text, quran_e_pak):
    words = split_into_words(transcribed_text)
    normalized_words = [normalize_text(word) for word in words]
    
    ayat_matches = {}

    for word in normalized_words:
        for index, row in quran_e_pak.iterrows():
            ayat = normalize_text(row['Ayat'])
            ayat_words = ayat.split()
            if word in ayat_words:
                position = ayat_words.index(word)
                if index not in ayat_matches:
                    ayat_matches[index] = {'score': 0, 'positions': []}
                ayat_matches[index]['score'] += 1
                ayat_matches[index]['positions'].append(position)

    # Sort matches by score and then by position consistency
    sorted_matches = sorted(ayat_matches.items(), key=lambda x: (x[1]['score'], -sum(x[1]['positions'])), reverse=True)

    matches = [(quran_e_pak.iloc[index], score_data['score']) for index, score_data in sorted_matches[:5]]
    
    return matches

# Function to get context verses
def get_context_verses(verse_index, quran_e_pak, context_range=2):
    start_index = max(0, verse_index - context_range)
    end_index = min(len(quran_e_pak), verse_index + context_range + 1)
    return quran_e_pak.iloc[start_index:end_index]

def main():

    transcribed_text = transcribe_audio_to_text()
    if transcribed_text:
        matching_verses = find_matching_verses(transcribed_text, quran_e_pak)
        if matching_verses:
            for matching_verse, score in matching_verses:
                verse_index = matching_verse.name
                context_verses = get_context_verses(verse_index, quran_e_pak)

                print("Matching verse:")
                print("Ayat                     | Ayat_Number | Surah_Name | Surah_Number")
                print(f"{matching_verse['Ayat']} | {matching_verse['Ayat_Number']} | {matching_verse['Surah_Name']} | {matching_verse['Surah_Number']}")
                
                print("\nContextual verses:")
                print("Ayat                                        | Ayat_Number | Surah_Name | Surah_Number")
                for index, row in context_verses.iterrows():
                    print(f"{row['Ayat']} | {row['Ayat_Number']} | {row['Surah_Name']} | {row['Surah_Number']}")
        else:
            print("No matching verses found.")
    else:
        print("No transcription available.")

if __name__ == "__main__":
    main()
    

Please recite the Ayat:
Transcription: واذ يرفع ابراهيم القواعد من البيت
Matching verse:
Ayat                     | Ayat_Number | Surah_Name | Surah_Number
وَإِذْ يَرْفَعُ إِبْرَاهِيمُ الْقَوَاعِدَ مِنَ الْبَيْتِ وَإِسْمَاعِيلُ رَبَّنَا تَقَبَّلْ مِنَّا ۖ إِنَّكَ أَنْتَ السَّمِيعُ الْعَلِيمُ | 127 | البقرة | 2

Contextual verses:
Ayat                                        | Ayat_Number | Surah_Name | Surah_Number
وَإِذْ جَعَلْنَا الْبَيْتَ مَثَابَةً لِلنَّاسِ وَأَمْنًا وَاتَّخِذُوا مِنْ مَقَامِ إِبْرَاهِيمَ مُصَلًّى ۖ وَعَهِدْنَا إِلَىٰ إِبْرَاهِيمَ وَإِسْمَاعِيلَ أَنْ طَهِّرَا بَيْتِيَ لِلطَّائِفِينَ وَالْعَاكِفِينَ وَالرُّكَّعِ السُّجُودِ | 125 | البقرة | 2
وَإِذْ قَالَ إِبْرَاهِيمُ رَبِّ اجْعَلْ هَٰذَا بَلَدًا آمِنًا وَارْزُقْ أَهْلَهُ مِنَ الثَّمَرَاتِ مَنْ آمَنَ مِنْهُمْ بِاللَّهِ وَالْيَوْمِ الْآخِرِ ۖ قَالَ وَمَنْ كَفَرَ فَأُمَتِّعُهُ قَلِيلًا ثُمَّ أَضْطَرُّهُ إِلَىٰ عَذَابِ النَّارِ ۖ وَبِئْسَ الْمَصِيرُ | 126 | البقرة | 2
وَإِذْ يَرْفَعُ إِبْرَاهِيمُ الْقَوَاعِدَ مِنَ الْبَي