In [108]:
# Importing required libraries
import os
import pickle
import requests
from bs4 import BeautifulSoup
from fuzzywuzzy import fuzz
from fuzzywuzzy import process


In [109]:

# # Scraping the webpage to get the chronological order of Surahs
# url = "https://www.wikiwand.com/en/List_of_chapters_in_the_Quran"
# response = requests.get(url)
# soup = BeautifulSoup(response.text, 'html.parser')

# # Finding the table in the webpage
# table = soup.find_all('table')[0]

# # Creating a dictionary to store Surah names and their chronological order
# surah_order = {}

# # Iterating over the rows in the table (skipping the header row)
# for row in table.find_all('tr')[1:]:
#     # Extracting the chronological order and Surah name from the row
#     order, anglicized_name, all_names = [cell.text for cell in row.find_all('td')[:3]]
#     # Adding the Surah and its order to the dictionary
#     surah_order[anglicized_name] = int(order)


In [110]:
def flatten(lst):
    result = []
    for i in lst:
        if isinstance(i, list):
            result.extend(flatten(i))
        else:
            result.append(i)
    return result

In [111]:
import time
# List of URLs to scrape
urls = [
    "https://www.wikiwand.com/en/List_of_chapters_in_the_Quran",
    "https://m.wordofallah.com/quran-index",
    "https://www.arabicbible.com/for-christians/quran/1375-list-of-suras-in-the-quran.html",
]
soup = None
# Function to scrape a URL and return a dictionary of Surah names and their chronological order
def scrape_url(url, get_from_pickle=True, save_to_pickle=True):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }
    surah_order_dicts = []
    surah_order_dict = {}
    surah_order_dict_2 = {}

    url_split_domains = url.split('//')[1].split('.')
    pickle_file_name = url_split_domains[1] + '.pickle'
    
    if get_from_pickle and os.path.exists(pickle_file_name):
        with open(pickle_file_name, 'rb') as f:
            return pickle.load(f)

    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    if 'wikiwand' in url:
        table = soup.find('table')
        for row in table.find_all('tr')[1:]:
            order, anglicized_name, all_names = [cell.text for cell in row.find_all('td')[:3]]
            # Adding the Surah and its order to the dictionary
            surah_order_dict[anglicized_name.lower()] = int(order)
            surah_order_dict_2[all_names.lower()] = int(order)
    elif 'wordofallah' in url:
        # # Find all divs with class 'item' 
        divs = soup.find_all('div', class_='en')
        surah_order_list = [divs.text.split('\n')[0] for divs in divs]
        counter = 1
        for surah in surah_order_list:
            if surah.lower() not in surah_order_dict:
                surah_order_dict[surah.lower()] = counter
                counter += 1
    elif 'arabicbible' in url:
        table = soup.find('table')
        # print(table)
        for row in table.find_all('tr')[1:]:
            try:
                order, anglicized_name, en_name_but_with_ar_pronounciation = [cell.text.lower() for cell in row.find_all('td')[:3]]
            except:
                continue
            # Adding the Surah and its order to the dictionary
            surah_order_dict[anglicized_name.lower()] = int(order)
            surah_order_dict_2[en_name_but_with_ar_pronounciation.lower()] = int(order)

    surah_order_dicts.append(surah_order_dict)
    if surah_order_dict_2 != {}:
        surah_order_dicts.append(surah_order_dict_2)

    with save_to_pickle and open(pickle_file_name, 'wb') as f:
        pickle.dump(surah_order_dicts, f)

    return surah_order_dicts

In [112]:
# Scrape each URL and store the results in a list of dictionaries
surah_order_dicts = [so for so in flatten([scrape_url(url) for url in urls]) if  so != {}]

In [113]:
# Getting the list of folder names (replace this with the actual path where the folders are located)
base_path = "YOUR PATH HERE"
folder_names = os.listdir(base_path)

# Iterating over the folder names
results = []
for folder in folder_names:
    # Initialize a list to store the best match and score from each dictionary
    best_matches = []
    # Perform fuzzy matching on each dictionary
    for surah_order_dict in surah_order_dicts:
        if surah_order_dict == {}:
            continue
        best_match, score = process.extractOne(folder.lower(), surah_order_dict.keys())
        best_matches.append((best_match, score))
    # Find the best match with the highest score
    best_match, best_score = max(best_matches, key=lambda x: x[1])
    # Find the corresponding order number for the best match
    for surah_order_dict in surah_order_dicts:
        if best_match in surah_order_dict.keys():
            folder_order = surah_order_dict[best_match] 
            break
    results.append((folder_order, folder, best_match, best_score))


In [114]:
# sort results list by folder_order
results.sort(key=lambda x: x[0])

In [115]:
def rename_folders(results, debuging=False):
    global base_path
    for result in results:
        folder_order, folder_orig_name, best_match, best_score = result
        new_folder_name = f"{folder_order:03d} - {best_match.capitalize()}"
        print(f"Renaming {folder_orig_name:<15} to -> {new_folder_name}")
        if not debuging:
            src = os.path.join(base_path, folder_orig_name)
            dst = os.path.join(base_path, new_folder_name)
            try:
                os.rename(src, dst)
            except:
                print(f"WARNING: Seems that two folders have been renamed to the same name: {new_folder_name}")
                print(f"will add ({counter}) to the second folder...")
                dst = os.path.join(base_path, f"{new_folder_name} ({counter})")
                os.rename(src, dst)
                counter += 1
            counter = 1

In [116]:
rename_folders(results, debuging=False)

Renaming Al-Fatihah      to -> 001 - Al-fatihah
Renaming Al-Baqarah      to -> 002 - Al-baqarah
Renaming Ali 'Imran      to -> 003 - Ali 'imran
Renaming An-Nisa         to -> 004 - An-nisa
Renaming Al-Ma'idah      to -> 005 - Al-ma'idah
Renaming Al-An'am        to -> 006 - Al-an'am
Renaming Al-A'raf        to -> 007 - Al-a'raf
Renaming Al-Anfal        to -> 008 - Al-anfal
Renaming At-Tawbah       to -> 009 - At-tawbah
Renaming Yunus           to -> 010 - Yunus
Renaming Hud             to -> 011 - Hud
Renaming Yusuf           to -> 012 - Yusuf
Renaming Ar-Ra'd         to -> 013 - Ar-ra'd
Renaming Ibrahim         to -> 014 - Ibrahim
Renaming Al-Hijr         to -> 015 - Al-hijr
Renaming An-Nahl         to -> 016 - An-nahl
Renaming Al-Isra         to -> 017 - Al-isra
Renaming Al-Kahf         to -> 018 - Al-kahf
Renaming Maryam          to -> 019 - Maryam
Renaming Taha            to -> 020 - Taha
Renaming Al-Anbya        to -> 021 - Al-anbiya
Renaming Al-Hajj         to -> 022 - Al-hajj
Ren

# Visualization of Variables (for Debugging)

In [117]:
surah_order_dicts[0]

{'al-fatihah': 1,
 'al-baqarah': 2,
 "ali 'imran": 3,
 'an-nisa': 4,
 "al-ma'idah": 5,
 "al-an'am": 6,
 "al-a'raf": 7,
 'al-anfal': 8,
 'at-tawbah': 9,
 'yunus': 10,
 'hud': 11,
 'yusuf': 12,
 "ar-ra'd": 13,
 'ibrahim': 14,
 'al-hijr': 15,
 'an-nahl': 16,
 'al-isra': 17,
 'al-kahf': 18,
 'maryam': 19,
 'ta-ha': 20,
 'al-anbiya': 21,
 'al-hajj': 22,
 "al-mu'minun": 23,
 'an-nur': 24,
 'al-furqan': 25,
 "ash-shu'ara": 26,
 'an-naml': 27,
 'al-qasas': 28,
 'al-ankabut': 29,
 'ar-rum': 30,
 'luqmaan': 31,
 'as-sajdah': 32,
 'al-ahzaab': 33,
 'saba': 34,
 'faatir': 35,
 'ya-sin': 36,
 'as-saaffaat': 37,
 'saad': 38,
 'az-zumar': 39,
 'ghafir': 40,
 'fussilat': 41,
 'ash-shura': 42,
 'az-zukhruf': 43,
 'ad-dukhaan': 44,
 'al-jaathiyah': 45,
 'al-ahqaaf': 46,
 'muhammad': 47,
 'al-fath': 48,
 'al-hujuraat': 49,
 'qaaf': 50,
 'adh-dhaariyaat': 51,
 'at-toor': 52,
 'an-najm': 53,
 'al-qamar': 54,
 'ar-rahman': 55,
 "al-waqi'ah": 56,
 'al-hadeed': 57,
 'al-mujadila': 58,
 'al-hashr': 59,
 'al-mu

In [118]:
surah_order_dicts[1]

{'ٱلْفَاتِحَةal-fātiḥahal-ḥamd': 1,
 'ٱلْبَقَرَةal-baq̈arah': 2,
 'آلِ عِمْرَانʾāli ʿimrān': 3,
 'ٱلنِّسَاءan-nisāʾ': 4,
 'ٱلْمَائِدَةal-māʾidah': 5,
 'ٱلْأَنْعَامal-ʾanʿām': 6,
 'ٱلْأَعْرَافal-ʾaʿrāf': 7,
 'ٱلْأَنْفَالal-ʾanfāl': 8,
 'ٱلتَّوْبَةat-tawbah': 9,
 'يُونُسyūnus': 10,
 'هُودhūd': 11,
 'يُوسُفyūsuf': 12,
 'ٱلرَّعْدar-raʿd': 13,
 'إِبْرَاهِيمʾibrāhīm': 14,
 'ٱلْحِجْرal-ḥijr': 15,
 'ٱلنَّحْلan-naḥl': 16,
 'ٱلْإِسْرَاءal-ʾisrāʾ': 17,
 'ٱلْكَهْفal-kahf': 18,
 'مَرْيَمmaryam': 19,
 'طهṭāʾ hāʾ': 20,
 'ٱلْأَنْبِيَاءal-ʾanbiyāʾ': 21,
 'ٱلْحَجّal-ḥajj': 22,
 'ٱلْمُؤْمِنُونal-muʾminūn': 23,
 'ٱلنُّورan-nūr': 24,
 'ٱلْفُرْقَانal-furq̈ān': 25,
 'ٱلشُّعَرَاءaš-šuʿarāʾ': 26,
 'ٱلنَّمْلan-naml': 27,
 'ٱلْقَصَصal-q̈aṣaṣ': 28,
 'ٱلْعَنْكَبُوتal-ʿankabūt': 29,
 'ٱلرُّومar-rūm': 30,
 'لُقْمَانluq̈mān': 31,
 'ٱلسَّجْدَةas-sajdah': 32,
 'ٱلْأَحْزَابal-ʾaḥzāb': 33,
 'سَبَأsabaʾ': 34,
 'فَاطِرfāṭir': 35,
 'يسyāʾ sīn': 36,
 'ٱلصَّافَّاتaṣ-ṣāffāt': 37,
 'صṣād': 38,
 'ٱلزُّمَرaz-zumar': 39,
 'غَافِرg

In [119]:
surah_order_dicts[2]

{'al-fatiha': 1,
 'al-baqara': 2,
 'aal-imran': 3,
 'an-nisa': 4,
 'al-maeda': 5,
 'al-anaam': 6,
 'al-araf': 7,
 'al-anfal': 8,
 'at-taubah': 9,
 'younus': 10,
 'hood': 11,
 'yusuf': 12,
 "ar-ra'ad": 13,
 'ibrahim': 14,
 'al-hijr': 15,
 'an-nahl': 16,
 'al-isra': 17,
 'al-kahf': 18,
 'maryam': 19,
 'taha': 20,
 'al-anbiya': 21,
 'al-hajj': 22,
 'al-muminoon': 23,
 'al-noor': 24,
 'al-furqan': 25,
 "ash-shu'ara": 26,
 'an-naml': 27,
 'al-qasas': 28,
 'al-ankaboot': 29,
 'ar-room': 30,
 'luqman': 31,
 'as-sajda': 32,
 'al-ahzab': 33,
 'as-saba': 34,
 'fatir': 35,
 'yaseen': 36,
 'as-saaffat': 37,
 'saad': 38,
 'al-zumar': 39,
 'ghafir': 40,
 'fussilat': 41,
 'ash-shura': 42,
 'az-zukhruf': 43,
 'al-dukhan': 44,
 'al-jathiya': 45,
 'al-ahqaf': 46,
 'muhammad': 47,
 'al-fath': 48,
 'al-hujraat': 49,
 'qaaf': 50,
 'al-dhariyaat': 51,
 'at-tur': 52,
 'an-najm': 53,
 'al-qamar': 54,
 'al-rahman': 55,
 'al-waqia': 56,
 'al-hadid': 57,
 'al-mujadilah': 58,
 'al-hashr': 59,
 'al-mumtahina': 60,

In [120]:
results

[(1, 'Al-Fatihah', 'al-fatihah', 100),
 (2, 'Al-Baqarah', 'al-baqarah', 100),
 (3, "Ali 'Imran", "ali 'imran", 100),
 (4, 'An-Nisa', 'an-nisa', 100),
 (5, "Al-Ma'idah", "al-ma'idah", 100),
 (6, "Al-An'am", "al-an'am", 100),
 (7, "Al-A'raf", "al-a'raf", 100),
 (8, 'Al-Anfal', 'al-anfal', 100),
 (9, 'At-Tawbah', 'at-tawbah', 100),
 (10, 'Yunus', 'yunus', 100),
 (11, 'Hud', 'hud', 100),
 (12, 'Yusuf', 'yusuf', 100),
 (13, "Ar-Ra'd", "ar-ra'd", 100),
 (14, 'Ibrahim', 'ibrahim', 100),
 (15, 'Al-Hijr', 'al-hijr', 100),
 (16, 'An-Nahl', 'an-nahl', 100),
 (17, 'Al-Isra', 'al-isra', 100),
 (18, 'Al-Kahf', 'al-kahf', 100),
 (19, 'Maryam', 'maryam', 100),
 (20, 'Taha', 'taha', 100),
 (21, 'Al-Anbya', 'al-anbiya', 94),
 (22, 'Al-Hajj', 'al-hajj', 100),
 (23, "Al-Mu'minun", "al-mu'minun", 100),
 (24, 'An-Nur', 'an-nur', 100),
 (25, 'Al-Furqan', 'al-furqan', 100),
 (26, "Ash-Shu'ara", "ash-shu'ara", 100),
 (27, 'An-Naml', 'an-naml', 100),
 (28, 'Al-Qasas', 'al-qasas', 100),
 (29, "Al-'Ankabut", 'al-