# Automatically 

### solved the two lines problem 


In [None]:
import pandas as pd
import re
import unicodedata
import speech_recognition as sr

def get_user_inputs():
    surah_name = str(input("Enter the Surah name: "))
    start_line = int(input("Enter the starting line number: "))
    end_line = int(input("Enter the ending line number: "))
    return surah_name, start_line, end_line

def recognize_speech():
    r = sr.Recognizer()
    recognized_text = ""
    
    while True:
        with sr.Microphone() as src:
            print('Say something....')
            audio = r.listen(src)
        
        try:
            t = r.recognize_google(audio, language='ar-AR')
            print(t)
            if t == "صدق الله العظيم":
                print("Stopping...")
                break
            else:
                recognized_text += t + ' '
        except sr.UnknownValueError as U:
            print(U)
        except sr.RequestError as R:
            print(R)
    
    with open('C:/Folder/text.txt', 'a', encoding='utf-8') as f:
        f.write(recognized_text)
    return recognized_text.strip()

def normalize_arabic(text):
    text = re.sub(r'[^\w\s]', '', text)
    text = ''.join([c for c in unicodedata.normalize('NFKD', text) if not unicodedata.combining(c)])
    return text

def load_dataset(dataset_path):
    return pd.read_csv(dataset_path)

def process_text(recognized_text, dataset, surah_name, start_line, end_line):
    filtered_dataset = dataset[(dataset['sorah'] == surah_name) & (dataset['ayah'].between(start_line, end_line))]
    
    # Concatenate the filtered dataset into a single line of text
    concatenated_dataset_text = ' '.join(filtered_dataset['text'].tolist())
    normalized_concatenated_dataset_text = normalize_arabic(concatenated_dataset_text)
    
    # Normalize the recognized text
    normalized_concatenated_recognized_text = normalize_arabic(recognized_text)
    
    # Compare the concatenated texts
    if normalized_concatenated_recognized_text == normalized_concatenated_dataset_text:
        comparison_results_final = [(start_line, normalized_concatenated_recognized_text, "Correct", None)]
    elif normalized_concatenated_recognized_text in normalized_concatenated_dataset_text:
        missing_part = normalized_concatenated_dataset_text.replace(normalized_concatenated_recognized_text, '').strip()
        comparison_results_final = [(start_line, normalized_concatenated_recognized_text, "Incomplete", missing_part)]
    else:
        recognized_words = normalized_concatenated_recognized_text.split()
        expected_words = normalized_concatenated_dataset_text.split()
        incorrect_words = [(rec_word, exp_word) for rec_word, exp_word in zip(recognized_words, expected_words) if rec_word != exp_word]
        incorrect_details = '; '.join([f"{rec_word} -> {exp_word}" for rec_word, exp_word in incorrect_words])
        comparison_results_final = [(start_line, normalized_concatenated_recognized_text, "Incorrect", incorrect_details)]
    
    return pd.DataFrame(comparison_results_final, columns=['Line Number', 'Recognized Text', 'Status', 'Details'])


In [None]:

def main():
    dataset_path = "C:/Folder/ayah_text_diacritics_with_sorah_names.csv"
    surah_name, start_line, end_line = get_user_inputs()
    
    recognized_text = recognize_speech()
    dataset = load_dataset(dataset_path)
    comparison_df_final = process_text(recognized_text, dataset, surah_name, start_line, end_line)
    
    # Display the final comparison DataFrame
    print(comparison_df_final)

if __name__ == "__main__":
    main()
