### Flash Cards Data Analysis

In [195]:
import pandas as pd
import numpy as np
import re
from functools import reduce
from camel_tools.utils.charmap import CharMapper
from lang_trans.arabic import buckwalter
from pathlib import Path
import shutil

In [196]:
path = f"/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Arabic/Quaran/Flash Cards/Result"

Path(path).mkdir(parents=True, exist_ok=True)

In [197]:
# buckwalter text clean for simple before transString
def clean_ex(text):
    buckwalter_out = re.sub(r'''([PJVG\.:;,!\+\^\]\[@#FNKauio`~"%-])''', "", text) # difference from clean
    return buckwalter_out

In [198]:
# Arabic Encoding Extended
# -*- coding: utf-8 -*-

# Arabic Transliteration based on Buckwalter
# dictionary source is buckwalter2unicode.py http://www.redhat.com/archives/fedora-extras-commits/2007-June/msg03617.html 

buck2uni = {"'": u"\u0621", # hamza-on-the-line
            "|": u"\u0622", # madda
            ">": u"\u0623", # hamza-on-'alif
            "&": u"\u0624", # hamza-on-waaw
            "<": u"\u0625", # hamza-under-'alif
            "}": u"\u0626", # hamza-on-yaa'
            "A": u"\u0627", # bare 'alif
            "b": u"\u0628", # baa'
            "p": u"\u0629", # taa' marbuuTa
            "t": u"\u062A", # taa'
            "v": u"\u062B", # thaa'
            "j": u"\u062C", # jiim
            "H": u"\u062D", # Haa'
            "x": u"\u062E", # khaa'
            "d": u"\u062F", # daal
            "*": u"\u0630", # dhaal
            "r": u"\u0631", # raa'
            "z": u"\u0632", # zaay
            "s": u"\u0633", # siin
            "$": u"\u0634", # shiin
            "S": u"\u0635", # Saad
            "D": u"\u0636", # Daad
            "T": u"\u0637", # Taa'
            "Z": u"\u0638", # Zaa' (DHaa')
            "E": u"\u0639", # cayn
            "g": u"\u063A", # ghayn
            "_": u"\u0640", # taTwiil
            "f": u"\u0641", # faa'
            "q": u"\u0642", # qaaf
            "k": u"\u0643", # kaaf
            "l": u"\u0644", # laam
            "m": u"\u0645", # miim
            "n": u"\u0646", # nuun
            "h": u"\u0647", # haa'
            "w": u"\u0648", # waaw
            "Y": u"\u0649", # 'alif maqSuura
            "y": u"\u064A", # yaa'
            "F": u"\u064B", # fatHatayn
            "N": u"\u064C", # Dammatayn
            "K": u"\u064D", # kasratayn
            "a": u"\u064E", # fatHa
            "u": u"\u064F", # Damma
            "i": u"\u0650", # kasra
            "~": u"\u0651", # shaddah
            "o": u"\u0652", # sukuun
            "^": u"\u0653", # maddah
            "#": u"\u0654", # hamzaabove            
            "`": u"\u0670", # dagger 'alif
            "{": u"\u0671", # waSla
            "P": u"\u067E", # arabicletterpeh
            "J": u"\u0686", 
            "V": u"\u06A4", # arabicletterveh
            "G": u"\u06AF", # arabiclettergaf
            ":": u"\u06DC", # smallhighseen
            "@": u"\u06DF", # smallhighroundedzero
            "\"": u"\u06E0", # smallhighuprightrectangularzero
            "[": u"\u06E2", # smallhighmeemisolatedform
            ";": u"\u06E3", # smalllowseen
            ",": u"\u06E5", # smallwaw
            ".": u"\u06E6", # smallya
            "!": u"\u06E8", # smallhighnoon
            "-": u"\u06EA", # emptycentrelowstop
            "+": u"\u06EB", # emptycentrehighstop
            "%": u"\u06EC", # roundedhighstopwithfilledcentre
            "]": u"\u06ED", # smalllowmeem           
}

def transString(string, reverse=0):
    '''Given a Unicode string, transliterate into Buckwalter. To go from
    Buckwalter back to Unicode, set reverse=1'''

    for k, v in buck2uni.items():
      if not reverse:
            string = string.replace(v, k)
      else:
            string = string.replace(k, v)

    return string

In [199]:
# Simple Technique
# -*- coding: utf-8 -*-

# Arabic Transliteration based on Buckwalter
# dictionary source is buckwalter2unicode.py http://www.redhat.com/archives/fedora-extras-commits/2007-June/msg03617.html 

buck2unisimple = {"'": u"\u0621", # hamza-on-the-line
            "|": u"\u0622", # madda
            ">": u"\u0623", # hamza-on-'alif
            "&": u"\u0624", # hamza-on-waaw
            "<": u"\u0625", # hamza-under-'alif
            "A": u"\u0627", # bare 'alif
            "b": u"\u0628", # baa'
            "p": u"\u0629", # taa' marbuuTa
            "t": u"\u062A", # taa'
            "v": u"\u062B", # thaa'
            "j": u"\u062C", # jiim
            "H": u"\u062D", # Haa'
            "x": u"\u062E", # khaa'
            "d": u"\u062F", # daal
            "*": u"\u0630", # dhaal
            "r": u"\u0631", # raa'
            "z": u"\u0632", # zaay
            "s": u"\u0633", # siin
            "$": u"\u0634", # shiin
            "S": u"\u0635", # Saad
            "D": u"\u0636", # Daad
            "T": u"\u0637", # Taa'
            "Z": u"\u0638", # Zaa' (DHaa')
            "E": u"\u0639", # cayn
            "g": u"\u063A", # ghayn
            "_": u"\u0640", # taTwiil
            "f": u"\u0641", # faa'
            "q": u"\u0642", # qaaf
            "k": u"\u0643", # kaaf
            "l": u"\u0644", # laam
            "m": u"\u0645", # miim
            "n": u"\u0646", # nuun
            "h": u"\u0647", # haa'
            "w": u"\u0648", # waaw
            "Y": u"\u0649", # 'alif maqSuura
            "y": u"\u064A", # yaa'
            "{": u"\u0671", # waSla         
}

def transStringSimple(string, reverse=0):
    '''Given a Unicode string, transliterate into Buckwalter. To go from
    Buckwalter back to Unicode, set reverse=1'''

    for k, v in buck2unisimple.items():
      if not reverse:
            string = string.replace(v, k)
      else:
            string = string.replace(k, v)

    return string

In [200]:
# character clean for simple after transStringSimple 
def clean(text):
    arabic_out = re.sub(r'''([PJVG\.:;,!\+\]\[@#FNKauio`~"%-])''', "", text) # .$^*+
    arabic_out2 = re.sub(r"\^", " ", arabic_out)
    return arabic_out2

#### Quaran Word Translate Select

In [201]:
#df_arabic_translate = pd.read_excel("/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Arabic/Quaran/Flash Cards/Data/All_Surah_Translate_File_Concat.xlsx")  # with asterisk
#df_arabic_translate  # it needs transString(x,0), clean_ex(text), transString(x,1) funcs

In [202]:
df_quaran_master = pd.read_excel("/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Arabic/Quaran/Flash Cards/Data/Master 17052022.xlsx")
df_all_word_translate = df_quaran_master.loc[:,["surah","surah latin","tanzil_clean","tanzil_plain","arabic","translate_english","translate_urdu","translate_hindi","translate_indonesian","translate_bangla","translate_turkish","translate_russian"]]
df_all_word_translate

Unnamed: 0,surah,surah latin,tanzil_clean,tanzil_plain,arabic,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,1,Al-Fatiha,بسم,بِسْمِ,بِسۡمِ,In (the) name,ساتھ نام,साथ नाम,dengan nama,নামে,adıyla,С именем
1,1,Al-Fatiha,الله,اللَّهِ,ٱللَّهِ,(of) Allah,اللہ کے,अल्लाह के,Allah,আল্লাহ (র),Allah'ın,"Аллаха,"
2,1,Al-Fatiha,الرحمن,الرَّحْمَٰنِ,ٱلرَّحۡمَٰنِ,the Most Gracious,جو بے حد مہربان ہے,जो बहुत मेहरबान,Maha Pengasih,পরম করুণাময়,Rahman,"Милостивого,"
3,1,Al-Fatiha,الرحيم,الرَّحِيمِ,ٱلرَّحِيمِ,the Most Merciful,بار بار رحم فرمانے والا ہے,निहायत रहम करने वाला है,Maha Penyayang,অসীম দয়ালু,Rahim,Милосердного!
4,1,Al-Fatiha,الحمد,الْحَمْدُ,ٱلۡحَمۡدُ,All praises and thanks,سب تعریف,सब तारीफ़,pujian,সকল প্রশংসা,hamdolsun,Хвала
...,...,...,...,...,...,...,...,...,...,...,...,...
78242,114,An-Nas,صدور,صُدُورِ,صُدُورِ,(the) breasts,سینوں,सीनों में,dada,অন্তরসমূহের,göğüslerine,грудях
78243,114,An-Nas,الناس,النَّاسِ,ٱلنَّاسِ,(of) mankind,انسانوں کے,लोगों के,manusia,মানুষের,insanların,"людей,"
78244,114,An-Nas,من,مِنَ,مِنَ,From,سے,जिन्नों में से,dari,মধ্য হতে,cinlerden,(будучи) из (числа)
78245,114,An-Nas,الجنة,الْجِنَّةِ,ٱلۡجِنَّةِ,the jinn,جنوں میں,जिन्नों में से,jin,জিনের,cinlerden,джиннов


In [155]:
df_all_word_translate = df_all_word_translate[~df_all_word_translate["arabic"].isnull()]
df_all_word_translate

Unnamed: 0,surah,surah latin,tanzil_clean,tanzil_plain,arabic,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,1,Al-Fatiha,بسم,بِسْمِ,بِسۡمِ,In (the) name,ساتھ نام,साथ नाम,dengan nama,নামে,adıyla,С именем
1,1,Al-Fatiha,الله,اللَّهِ,ٱللَّهِ,(of) Allah,اللہ کے,अल्लाह के,Allah,আল্লাহ (র),Allah'ın,"Аллаха,"
2,1,Al-Fatiha,الرحمن,الرَّحْمَٰنِ,ٱلرَّحۡمَٰنِ,the Most Gracious,جو بے حد مہربان ہے,जो बहुत मेहरबान,Maha Pengasih,পরম করুণাময়,Rahman,"Милостивого,"
3,1,Al-Fatiha,الرحيم,الرَّحِيمِ,ٱلرَّحِيمِ,the Most Merciful,بار بار رحم فرمانے والا ہے,निहायत रहम करने वाला है,Maha Penyayang,অসীম দয়ালু,Rahim,Милосердного!
4,1,Al-Fatiha,الحمد,الْحَمْدُ,ٱلۡحَمۡدُ,All praises and thanks,سب تعریف,सब तारीफ़,pujian,সকল প্রশংসা,hamdolsun,Хвала
...,...,...,...,...,...,...,...,...,...,...,...,...
78242,114,An-Nas,صدور,صُدُورِ,صُدُورِ,(the) breasts,سینوں,सीनों में,dada,অন্তরসমূহের,göğüslerine,грудях
78243,114,An-Nas,الناس,النَّاسِ,ٱلنَّاسِ,(of) mankind,انسانوں کے,लोगों के,manusia,মানুষের,insanların,"людей,"
78244,114,An-Nas,من,مِنَ,مِنَ,From,سے,जिन्नों में से,dari,মধ্য হতে,cinlerden,(будучи) из (числа)
78245,114,An-Nas,الجنة,الْجِنَّةِ,ٱلۡجِنَّةِ,the jinn,جنوں میں,जिन्नों में से,jin,জিনের,cinlerden,джиннов


In [156]:
df_all_word_translate.drop("arabic", axis=1, inplace=True)
df_all_word_translate.rename(columns={"tanzil_plain":"arabic","tanzil_clean":"arabic_simple"}, inplace=True)
df_all_word_translate

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,surah,surah latin,arabic_simple,arabic,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,1,Al-Fatiha,بسم,بِسْمِ,In (the) name,ساتھ نام,साथ नाम,dengan nama,নামে,adıyla,С именем
1,1,Al-Fatiha,الله,اللَّهِ,(of) Allah,اللہ کے,अल्लाह के,Allah,আল্লাহ (র),Allah'ın,"Аллаха,"
2,1,Al-Fatiha,الرحمن,الرَّحْمَٰنِ,the Most Gracious,جو بے حد مہربان ہے,जो बहुत मेहरबान,Maha Pengasih,পরম করুণাময়,Rahman,"Милостивого,"
3,1,Al-Fatiha,الرحيم,الرَّحِيمِ,the Most Merciful,بار بار رحم فرمانے والا ہے,निहायत रहम करने वाला है,Maha Penyayang,অসীম দয়ালু,Rahim,Милосердного!
4,1,Al-Fatiha,الحمد,الْحَمْدُ,All praises and thanks,سب تعریف,सब तारीफ़,pujian,সকল প্রশংসা,hamdolsun,Хвала
...,...,...,...,...,...,...,...,...,...,...,...
78242,114,An-Nas,صدور,صُدُورِ,(the) breasts,سینوں,सीनों में,dada,অন্তরসমূহের,göğüslerine,грудях
78243,114,An-Nas,الناس,النَّاسِ,(of) mankind,انسانوں کے,लोगों के,manusia,মানুষের,insanların,"людей,"
78244,114,An-Nas,من,مِنَ,From,سے,जिन्नों में से,dari,মধ্য হতে,cinlerden,(будучи) из (числа)
78245,114,An-Nas,الجنة,الْجِنَّةِ,the jinn,جنوں میں,जिन्नों में से,jin,জিনের,cinlerden,джиннов


In [173]:
df_arabic_word_count = df_all_word_translate["arabic"].value_counts(ascending=False).reset_index()
df_arabic_word_count.rename(columns={"index":"arabic","arabic":"arabic_frequency"}, inplace=True)
#df_arabic_word_count = df_arabic_word_count.head(300)
df_arabic_word_count

Unnamed: 0,arabic,arabic_frequency
0,مِنْ,1673
1,فِي,1185
2,مَا,1013
3,اللَّهِ,828
4,لَا,812
...,...,...
17611,بِصَوْتِكَ,1
17612,وَأَجْلِبْ,1
17613,بِخَيْلِكَ,1
17614,وَرَجِلِكَ,1


In [158]:
df_word_translate_select = df_all_word_translate.iloc[:,[2,3,4,5,6,7,8,9,10]]
df_word_translate_select.drop_duplicates(inplace=True)
df_word_translate_select

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,arabic_simple,arabic,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,بسم,بِسْمِ,In (the) name,ساتھ نام,साथ नाम,dengan nama,নামে,adıyla,С именем
1,الله,اللَّهِ,(of) Allah,اللہ کے,अल्लाह के,Allah,আল্লাহ (র),Allah'ın,"Аллаха,"
2,الرحمن,الرَّحْمَٰنِ,the Most Gracious,جو بے حد مہربان ہے,जो बहुत मेहरबान,Maha Pengasih,পরম করুণাময়,Rahman,"Милостивого,"
3,الرحيم,الرَّحِيمِ,the Most Merciful,بار بار رحم فرمانے والا ہے,निहायत रहम करने वाला है,Maha Penyayang,অসীম দয়ালু,Rahim,Милосердного!
4,الحمد,الْحَمْدُ,All praises and thanks,سب تعریف,सब तारीफ़,pujian,সকল প্রশংসা,hamdolsun,Хвала
...,...,...,...,...,...,...,...,...,...
78242,صدور,صُدُورِ,(the) breasts,سینوں,सीनों में,dada,অন্তরসমূহের,göğüslerine,грудях
78243,الناس,النَّاسِ,(of) mankind,انسانوں کے,लोगों के,manusia,মানুষের,insanların,"людей,"
78244,من,مِنَ,From,سے,जिन्नों में से,dari,মধ্য হতে,cinlerden,(будучи) из (числа)
78245,الجنة,الْجِنَّةِ,the jinn,جنوں میں,जिन्नों में से,jin,জিনের,cinlerden,джиннов


In [159]:
df_arabic_count_translate_merge = pd.merge(df_arabic_word_count, df_word_translate_select, how="left", on="arabic")
df_arabic_count_translate_merge.drop_duplicates(inplace=True)
df_arabic_count_translate_merge 

Unnamed: 0,arabic,arabic_frequency,arabic_simple,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,مِنْ,1673,من,from,سے,आपसे पहले,dari,থেকে,senden önce,до тебя
1,مِنْ,1673,من,from,سے,अपने रब की तरफ़ से,dari,পক্ষ হতে,Rablerinden,от
2,مِنْ,1673,من,from,سے,तुमसे पहले थे,dari,থেকে,sizden öncekileri,до вас
3,مِنْ,1673,من,[of],اس,इस जैसी,dari,মধ্য হতে,onun gibi,из
4,مِنْ,1673,من,from,سے,सिवाय,dari,দিয়ে,başkadan,помимо
...,...,...,...,...,...,...,...,...,...,...
70087,بِصَوْتِكَ,1,بصوتك,with your voice,اپنی آواز کے ساتھ,साथ अपनी आवाज़ के,dengan suaramu,দিয়ে তোমার কন্ঠস্বর,sesinle,своим голосом
70088,وَأَجْلِبْ,1,وأجلب,and assault,اور چڑھا لا,और चढ़ा ला,dan kerahkanlah,ও চড়াও হও,ve yaygarayı bas,и собирай
70089,بِخَيْلِكَ,1,بخيلك,with your cavalry,سوار اپنے,सवार अपने,dengan pasukan kudamu,নিয়ে তোমার অশ্বারোহী বাহিনী,atlılarınla,твою конницу
70090,وَرَجِلِكَ,1,ورجلك,and infantry,اور پیادے اپنے,और प्यादे अपने,dan pasukanmu yang berjalan kaki,ও তোমার পদাতিক বাহিনী,ve yayalarınla,"и пехоту твою,"


In [None]:
#df_arabic_count_translate_merge.to_excel("Quran_Arabic_Word_Translate_Selected.xlsx", index=False) 

In [None]:
#df_arabic_count_translate_merge["buckwalter"] = df_arabic_count_translate_merge.loc[:,"arabic"].apply(lambda x : transString(x,0))
#df_arabic_count_translate_merge

In [None]:
#df_arabic_count_translate_merge["buckwalter_simple"] = df_arabic_count_translate_merge.loc[:,"buckwalter"].apply(lambda x : clean_ex(x))
#df_arabic_count_translate_merge

In [None]:
#df_arabic_count_translate_merge["arabic_simple"] = df_arabic_count_translate_merge.loc[:,"buckwalter_simple"].apply(lambda x : transString(x,1))
#df_arabic_count_translate_merge

In [162]:
## It is for first file
#df_arabic_count_translate_merge["buckwalter"] = df_arabic_count_translate_merge.loc[:,"arabic"].apply(lambda x : transString(x,0))
#df_arabic_count_translate_merge["buckwalter_simple"] = df_arabic_count_translate_merge.loc[:,"buckwalter"].apply(lambda x : clean_ex(x))# Convert Arabic Simple
#df_arabic_count_translate_merge["arabic_simple"] = df_arabic_count_translate_merge.loc[:,"buckwalter_simple"].apply(lambda x : transString(x,1))
#df_arabic_count_translate_merge.drop(["buckwalter","buckwalter_simple"], axis=1, inplace=True)
#df_arabic_count_translate_merge = df_arabic_count_translate_merge.iloc[:,[0,10,1,2,3,4,5,6,7,8,9]]
#df_arabic_count_translate_merge

In [164]:
df_select_english = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency"]).apply(lambda x: x['translate_english'].value_counts(ascending=False).index[0]))
df_select_english.rename(columns={0:"translate_english"}, inplace=True)
df_select_english.reset_index(inplace=True)
df_select_english.sort_values(by="arabic_frequency", ascending=False, inplace=True)
df_select_english.reset_index(drop=True, inplace=True)
df_select_english

Unnamed: 0,arabic,arabic_simple,arabic_frequency,translate_english
0,مِنْ,من,1673,from
1,فِي,في,1185,in
2,مَا,ما,1013,what
3,اللَّهِ,الله,828,Allah
4,لَا,لا,812,not
...,...,...,...,...
17611,شَاعِرٍ,شاعر,1,(of) a poet;
17612,شَافِعِينَ,شافعين,1,intercessors
17613,شَاكِرٌ,شاكر,1,(is) All-Appreciative
17614,شَاكِرُونَ,شاكرون,1,(be) grateful


In [165]:
df_select_urdu = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency"]).apply(lambda x: x['translate_urdu'].value_counts(ascending=False).index[0]))
df_select_urdu.rename(columns={0:"translate_urdu"}, inplace=True)
df_select_urdu.reset_index(inplace=True)
df_select_urdu.reset_index(drop=True, inplace=True)
df_select_urdu

Unnamed: 0,arabic,arabic_simple,arabic_frequency,translate_urdu
0,آبَاءَكُمْ,آباءكم,3,اپنے باپ دادا کو
1,آبَاءَنَا,آباءنا,10,اپنے آباؤ اجداد کو
2,آبَاءَهُمُ,آباءهم,1,ان کے آباؤ اجداد کے پاس
3,آبَاءَهُمْ,آباءهم,3,ان کے باپوں کو
4,آبَاءِ,آباء,1,باپوں
...,...,...,...,...
17611,يُوَفِّقِ,يوفق,1,موافقت پیدا کرے گا
17612,يُوَفِّيهِمُ,يوفيهم,1,پورا پورا دے گا ان کو
17613,يُوَلُّوكُمُ,يولوكم,1,وہ پھیر دیں گے تم سے
17614,يُوَلُّونَ,يولون,1,وہ پھیریں گے


In [166]:
df_select_hindi = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency"]).apply(lambda x: x['translate_hindi'].value_counts(ascending=False).index[0]))
df_select_hindi.rename(columns={0:"translate_hindi"}, inplace=True)
df_select_hindi.reset_index(inplace=True)
df_select_hindi.sort_values(by="arabic_frequency", ascending=False, inplace=True)
df_select_hindi.reset_index(drop=True, inplace=True)
df_select_hindi

Unnamed: 0,arabic,arabic_simple,arabic_frequency,translate_hindi
0,مِنْ,من,1673,इससे पहले
1,فِي,في,1185,ज़मीन में
2,مَا,ما,1013,जो
3,اللَّهِ,الله,828,अल्लाह के
4,لَا,لا,812,नहीं
...,...,...,...,...
17611,شَاعِرٍ,شاعر,1,किसी शायर का
17612,شَافِعِينَ,شافعين,1,कोई सिफ़ारिशियों में से
17613,شَاكِرٌ,شاكر,1,क़द्रदान है
17614,شَاكِرُونَ,شاكرون,1,शुक्र गुज़ार हो


In [167]:
df_select_indonesian = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency"]).apply(lambda x: x['translate_indonesian'].value_counts(ascending=False).index[0]))
df_select_indonesian.rename(columns={0:"translate_indonesian"}, inplace=True)
df_select_indonesian.reset_index(inplace=True)
df_select_indonesian.sort_values(by="arabic_frequency", ascending=False, inplace=True)
df_select_indonesian.reset_index(drop=True, inplace=True)
df_select_indonesian

Unnamed: 0,arabic,arabic_simple,arabic_frequency,translate_indonesian
0,مِنْ,من,1673,dari
1,فِي,في,1185,dalam
2,مَا,ما,1013,apa
3,اللَّهِ,الله,828,Allah
4,لَا,لا,812,tidak
...,...,...,...,...
17611,شَاعِرٍ,شاعر,1,seorang penyair
17612,شَافِعِينَ,شافعين,1,pemberi syafa'at
17613,شَاكِرٌ,شاكر,1,Maha Mensyukuri
17614,شَاكِرُونَ,شاكرون,1,orang-orang yang berterima kasih


In [168]:
df_select_bangla = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency"]).apply(lambda x: x['translate_bangla'].value_counts(ascending=False).index[0]))
df_select_bangla.rename(columns={0:"translate_bangla"}, inplace=True)
df_select_bangla.reset_index(inplace=True)
df_select_bangla.sort_values(by="arabic_frequency", ascending=False, inplace=True)
df_select_bangla.reset_index(drop=True, inplace=True)
df_select_bangla

Unnamed: 0,arabic,arabic_simple,arabic_frequency,translate_bangla
0,مِنْ,من,1673,থেকে
1,فِي,في,1185,মধ্যে
2,مَا,ما,1013,যা
3,اللَّهِ,الله,828,আল্লাহর
4,لَا,لا,812,না
...,...,...,...,...
17611,شَاعِرٍ,شاعر,1,কবির
17612,شَافِعِينَ,شافعين,1,সুপারিশকারীদের
17613,شَاكِرٌ,شاكر,1,(তার) মূল্যদানকারী
17614,شَاكِرُونَ,شاكرون,1,কৃতজ্ঞ হবে


In [169]:
df_select_turkish = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency"]).apply(lambda x: x['translate_turkish'].value_counts(ascending=False).index[0]))
df_select_turkish.rename(columns={0:"translate_turkish"}, inplace=True)
df_select_turkish.reset_index(inplace=True)
df_select_turkish.sort_values(by="arabic_frequency", ascending=False, inplace=True)
df_select_turkish.reset_index(drop=True, inplace=True)
df_select_turkish

Unnamed: 0,arabic,arabic_simple,arabic_frequency,translate_turkish
0,مِنْ,من,1673,hiçbir
1,فِي,في,1185,içinde
2,مَا,ما,1013,şeyleri
3,اللَّهِ,الله,828,Allah'ın
4,لَا,لا,812,yoktur
...,...,...,...,...
17611,شَاعِرٍ,شاعر,1,bir şa'irin
17612,شَافِعِينَ,شافعين,1,şefa'atçilerimiz
17613,شَاكِرٌ,شاكر,1,karşılığını verir
17614,شَاكِرُونَ,شاكرون,1,şükredenlerden


In [170]:
df_select_russian = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency"]).apply(lambda x: x['translate_russian'].value_counts(ascending=False).index[0]))
df_select_russian.rename(columns={0:"translate_russian"}, inplace=True)
df_select_russian.reset_index(inplace=True)
df_select_russian.sort_values(by="arabic_frequency", ascending=False, inplace=True)
df_select_russian.reset_index(drop=True, inplace=True)
df_select_russian

Unnamed: 0,arabic,arabic_simple,arabic_frequency,translate_russian
0,مِنْ,من,1673,из
1,فِي,في,1185,в
2,مَا,ما,1013,"то, что"
3,اللَّهِ,الله,828,Аллаха
4,لَا,لا,812,не
...,...,...,...,...
17611,شَاعِرٍ,شاعر,1,(какого-либо) поэта!
17612,شَافِعِينَ,شافعين,1,заступников
17613,شَاكِرٌ,شاكر,1,"благодарный,"
17614,شَاكِرُونَ,شاكرون,1,благодарными?


In [171]:
dfs = [df_select_english,df_select_urdu,df_select_hindi,df_select_indonesian,df_select_bangla,df_select_turkish,df_select_russian]

In [172]:
df_all_translate_merge = reduce(lambda  left,right: pd.merge(left,right, on=['arabic','arabic_simple','arabic_frequency'], how='inner'), dfs)  # left,right make left to right merge
#df_all_translate_merge = reduce(lambda  right,left: pd.merge(left,right, on=['word'], how='outer'), dfs)  # right,left make right to left merge
df_all_translate_merge.drop_duplicates(inplace=True)
df_all_translate_merge

Unnamed: 0,arabic,arabic_simple,arabic_frequency,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,مِنْ,من,1673,from,سے,इससे पहले,dari,থেকে,hiçbir,из
1,فِي,في,1185,in,میں,ज़मीन में,dalam,মধ্যে,içinde,в
2,مَا,ما,1013,what,جو,जो,apa,যা,şeyleri,"то, что"
3,اللَّهِ,الله,828,Allah,اللہ کے,अल्लाह के,Allah,আল্লাহর,Allah'ın,Аллаха
4,لَا,لا,812,not,نہیں,नहीं,tidak,না,yoktur,не
...,...,...,...,...,...,...,...,...,...,...
17611,شَاعِرٍ,شاعر,1,(of) a poet;,شاعر کا,किसी शायर का,seorang penyair,কবির,bir şa'irin,(какого-либо) поэта!
17612,شَافِعِينَ,شافعين,1,intercessors,سفارشیوں,कोई सिफ़ारिशियों में से,pemberi syafa'at,সুপারিশকারীদের,şefa'atçilerimiz,заступников
17613,شَاكِرٌ,شاكر,1,(is) All-Appreciative,قدر دان ہے,क़द्रदान है,Maha Mensyukuri,(তার) মূল্যদানকারী,karşılığını verir,"благодарный,"
17614,شَاكِرُونَ,شاكرون,1,(be) grateful,شکر گزار ہو,शुक्र गुज़ार हो,orang-orang yang berterima kasih,কৃতজ্ঞ হবে,şükredenlerden,благодарными?


In [None]:
df_all_translate_merge.to_excel("Quran_Word_Translate_All.xlsx", index=False) 

#### Quaran Root With Related Arabic Word

In [None]:
df_all_translate_merge

In [194]:
df_master_root_data = pd.read_excel("/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Arabic/Quaran/Flash Cards/Data/Master 17052022.xlsx")
df_master_root_data = df_master_root_data.loc[:,["surah","ayah","word rank","surah latin","tanzil_clean","tanzil_plain","root_arabic","root"]]
df_master_root_data

Unnamed: 0,surah,ayah,word rank,surah latin,tanzil_clean,tanzil_plain,root_arabic,root
0,1,1,1,Al-Fatiha,بسم,بِسْمِ,سمو,smw
1,1,1,2,Al-Fatiha,الله,اللَّهِ,اله,Alh
2,1,1,3,Al-Fatiha,الرحمن,الرَّحْمَٰنِ,رحم,rHm
3,1,1,4,Al-Fatiha,الرحيم,الرَّحِيمِ,رحم,rHm
4,1,2,1,Al-Fatiha,الحمد,الْحَمْدُ,حمد,Hmd
...,...,...,...,...,...,...,...,...
78242,114,5,4,An-Nas,صدور,صُدُورِ,صدر,Sdr
78243,114,5,5,An-Nas,الناس,النَّاسِ,نوس,nws
78244,114,6,1,An-Nas,من,مِنَ,,
78245,114,6,2,An-Nas,الجنة,الْجِنَّةِ,جنن,jnn


In [186]:
df_arabic_root_count = df_master_root_data["root_arabic"].value_counts(ascending=False).reset_index()
df_arabic_root_count.rename(columns={"index":"root_arabic","root_arabic":"root_arabic_frequency"}, inplace=True)
df_arabic_root_count = df_arabic_root_count.head(99)
df_arabic_root_count

Unnamed: 0,root_arabic,root_arabic_frequency
0,اله,2851
1,قول,1722
2,كون,1390
3,ربب,980
4,امن,879
...,...,...
94,يدي,120
95,عزز,119
96,امم,119
97,جزي,118


In [191]:
df_root_data_merge = pd.merge(df_arabic_root_count, df_master_root_data, how="left", on=["root_arabic"])
df_root_data_merge.rename(columns={"tanzil_plain":"arabic","tanzil_clean":"arabic_simple"}, inplace=True)
#df_root_data_merge.drop_duplicates(inplace=True)
df_root_data_merge = df_root_data_merge.iloc[:,[0,8,1,2,3,4,5,6,7]]
df_root_data_merge 

Unnamed: 0,root_arabic,root,root_arabic_frequency,surah,ayah,word rank,surah latin,arabic_simple,arabic
0,اله,Alh,2851,1,1,2,Al-Fatiha,الله,اللَّهِ
1,اله,Alh,2851,1,2,2,Al-Fatiha,لله,لِلَّهِ
2,اله,Alh,2851,2,7,2,Al-Baqara,الله,اللَّهُ
3,اله,Alh,2851,2,8,6,Al-Baqara,بالله,بِاللَّهِ
4,اله,Alh,2851,2,9,2,Al-Baqara,الله,اللَّهَ
...,...,...,...,...,...,...,...,...,...
30110,ابو,Abw,117,56,48,1,Al-Waqi'a,أوآباؤنا,أَوَآبَاؤُنَا
30111,ابو,Abw,117,58,22,15,Al-Mujadila,آباءهم,آبَاءَهُمْ
30112,ابو,Abw,117,60,4,36,Al-Mumtahina,لأبيه,لِأَبِيهِ
30113,ابو,Abw,117,80,35,2,Abasa,وأبيه,وَأَبِيهِ


In [193]:
df_root_translate_merge = pd.merge(df_root_data_merge, df_all_translate_merge, how="left", on=["arabic_simple","arabic"])
df_root_translate_merge.drop_duplicates(inplace=True)
df_root_translate_merge 

Unnamed: 0,root_arabic,root,root_arabic_frequency,surah,ayah,word rank,surah latin,arabic_simple,arabic,arabic_frequency,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,اله,Alh,2851,1,1,2,Al-Fatiha,الله,اللَّهِ,828,Allah,اللہ کے,अल्लाह के,Allah,আল্লাহর,Allah'ın,Аллаха
1,اله,Alh,2851,1,2,2,Al-Fatiha,لله,لِلَّهِ,116,to Allah,اللہ کے لیے,अल्लाह के लिए,bagi Allah,জন্যে আল্লাহর,Allah'a,"Аллаху,"
2,اله,Alh,2851,2,7,2,Al-Baqara,الله,اللَّهُ,733,Allah,اللہ,अल्लाह,Allah,আল্লাহ,Allah,Аллах
3,اله,Alh,2851,2,8,6,Al-Baqara,بالله,بِاللَّهِ,139,in Allah,اللہ پر,अल्लाह पर,kepada Allah,আল্লাহই,Allah'a,в Аллаха
4,اله,Alh,2851,2,9,2,Al-Baqara,الله,اللَّهَ,592,Allah,اللہ سے,अल्लाह,Allah,আল্লাহকে,Allah,Аллах
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30110,ابو,Abw,117,56,48,1,Al-Waqi'a,أوآباؤنا,أَوَآبَاؤُنَا,2,Or our fathers,کیا بھلا ہمارے آباؤ اجداد,क्या भला आबा ओ अजदाद हमारे,atau bapak-bapak kami,এবং কি (উঠানো হবে) আমাদের পিতৃপুরুষদেরকেও,atalarımız da mı?,Или
30111,ابو,Abw,117,58,22,15,Al-Mujadila,آباءهم,آبَاءَهُمْ,3,their fathers,ان کے باپوں کو,उनके बापों को,bapak-bapak mereka,পিতাদের তাদের(পরিচয়),babalarını,"их отцов,"
30112,ابو,Abw,117,60,4,36,Al-Mumtahina,لأبيه,لِأَبِيهِ,9,to his father,اپنے باپ سے,अपने बाप से,kepada bapaknya,তার পিতাকে,babasına,своему отцу
30113,ابو,Abw,117,80,35,2,Abasa,وأبيه,وَأَبِيهِ,1,and his father,اور اپنے باپ سے,और अपने बाप से,dan bapaknya,ও তার বাপ (হতে),ve babası(ndan),"и отца своего,"


In [None]:
df_root_translate_merge.to_excel("Quran_Root_Word_Translate_All.xlsx", index=False) 

#### Quaran Ayah Frequency