### Flash Cards Data Analysis

In [120]:
import pandas as pd
import numpy as np
import re
from functools import reduce
from camel_tools.utils.charmap import CharMapper
from lang_trans.arabic import buckwalter

In [99]:
# buckwalter text clean for simple before transString
def clean_ex(text):
    buckwalter_out = re.sub(r'''([PJVG\.:;,!\+\^\]\[@#FNKauio`~"%-])''', "", text) # difference from clean
    return buckwalter_out

In [100]:
# Arabic Encoding Extended
# -*- coding: utf-8 -*-

# Arabic Transliteration based on Buckwalter
# dictionary source is buckwalter2unicode.py http://www.redhat.com/archives/fedora-extras-commits/2007-June/msg03617.html 

buck2uni = {"'": u"\u0621", # hamza-on-the-line
            "|": u"\u0622", # madda
            ">": u"\u0623", # hamza-on-'alif
            "&": u"\u0624", # hamza-on-waaw
            "<": u"\u0625", # hamza-under-'alif
            "}": u"\u0626", # hamza-on-yaa'
            "A": u"\u0627", # bare 'alif
            "b": u"\u0628", # baa'
            "p": u"\u0629", # taa' marbuuTa
            "t": u"\u062A", # taa'
            "v": u"\u062B", # thaa'
            "j": u"\u062C", # jiim
            "H": u"\u062D", # Haa'
            "x": u"\u062E", # khaa'
            "d": u"\u062F", # daal
            "*": u"\u0630", # dhaal
            "r": u"\u0631", # raa'
            "z": u"\u0632", # zaay
            "s": u"\u0633", # siin
            "$": u"\u0634", # shiin
            "S": u"\u0635", # Saad
            "D": u"\u0636", # Daad
            "T": u"\u0637", # Taa'
            "Z": u"\u0638", # Zaa' (DHaa')
            "E": u"\u0639", # cayn
            "g": u"\u063A", # ghayn
            "_": u"\u0640", # taTwiil
            "f": u"\u0641", # faa'
            "q": u"\u0642", # qaaf
            "k": u"\u0643", # kaaf
            "l": u"\u0644", # laam
            "m": u"\u0645", # miim
            "n": u"\u0646", # nuun
            "h": u"\u0647", # haa'
            "w": u"\u0648", # waaw
            "Y": u"\u0649", # 'alif maqSuura
            "y": u"\u064A", # yaa'
            "F": u"\u064B", # fatHatayn
            "N": u"\u064C", # Dammatayn
            "K": u"\u064D", # kasratayn
            "a": u"\u064E", # fatHa
            "u": u"\u064F", # Damma
            "i": u"\u0650", # kasra
            "~": u"\u0651", # shaddah
            "o": u"\u0652", # sukuun
            "^": u"\u0653", # maddah
            "#": u"\u0654", # hamzaabove            
            "`": u"\u0670", # dagger 'alif
            "{": u"\u0671", # waSla
            "P": u"\u067E", # arabicletterpeh
            "J": u"\u0686", 
            "V": u"\u06A4", # arabicletterveh
            "G": u"\u06AF", # arabiclettergaf
            ":": u"\u06DC", # smallhighseen
            "@": u"\u06DF", # smallhighroundedzero
            "\"": u"\u06E0", # smallhighuprightrectangularzero
            "[": u"\u06E2", # smallhighmeemisolatedform
            ";": u"\u06E3", # smalllowseen
            ",": u"\u06E5", # smallwaw
            ".": u"\u06E6", # smallya
            "!": u"\u06E8", # smallhighnoon
            "-": u"\u06EA", # emptycentrelowstop
            "+": u"\u06EB", # emptycentrehighstop
            "%": u"\u06EC", # roundedhighstopwithfilledcentre
            "]": u"\u06ED", # smalllowmeem           
}

def transString(string, reverse=0):
    '''Given a Unicode string, transliterate into Buckwalter. To go from
    Buckwalter back to Unicode, set reverse=1'''

    for k, v in buck2uni.items():
      if not reverse:
            string = string.replace(v, k)
      else:
            string = string.replace(k, v)

    return string

In [101]:
# Simple Technique
# -*- coding: utf-8 -*-

# Arabic Transliteration based on Buckwalter
# dictionary source is buckwalter2unicode.py http://www.redhat.com/archives/fedora-extras-commits/2007-June/msg03617.html 

buck2unisimple = {"'": u"\u0621", # hamza-on-the-line
            "|": u"\u0622", # madda
            ">": u"\u0623", # hamza-on-'alif
            "&": u"\u0624", # hamza-on-waaw
            "<": u"\u0625", # hamza-under-'alif
            "A": u"\u0627", # bare 'alif
            "b": u"\u0628", # baa'
            "p": u"\u0629", # taa' marbuuTa
            "t": u"\u062A", # taa'
            "v": u"\u062B", # thaa'
            "j": u"\u062C", # jiim
            "H": u"\u062D", # Haa'
            "x": u"\u062E", # khaa'
            "d": u"\u062F", # daal
            "*": u"\u0630", # dhaal
            "r": u"\u0631", # raa'
            "z": u"\u0632", # zaay
            "s": u"\u0633", # siin
            "$": u"\u0634", # shiin
            "S": u"\u0635", # Saad
            "D": u"\u0636", # Daad
            "T": u"\u0637", # Taa'
            "Z": u"\u0638", # Zaa' (DHaa')
            "E": u"\u0639", # cayn
            "g": u"\u063A", # ghayn
            "_": u"\u0640", # taTwiil
            "f": u"\u0641", # faa'
            "q": u"\u0642", # qaaf
            "k": u"\u0643", # kaaf
            "l": u"\u0644", # laam
            "m": u"\u0645", # miim
            "n": u"\u0646", # nuun
            "h": u"\u0647", # haa'
            "w": u"\u0648", # waaw
            "Y": u"\u0649", # 'alif maqSuura
            "y": u"\u064A", # yaa'
            "{": u"\u0671", # waSla         
}

def transStringSimple(string, reverse=0):
    '''Given a Unicode string, transliterate into Buckwalter. To go from
    Buckwalter back to Unicode, set reverse=1'''

    for k, v in buck2unisimple.items():
      if not reverse:
            string = string.replace(v, k)
      else:
            string = string.replace(k, v)

    return string

In [102]:
# character clean for simple after transStringSimple 
def clean(text):
    arabic_out = re.sub(r'''([PJVG\.:;,!\+\]\[@#FNKauio`~"%-])''', "", text) # .$^*+
    arabic_out2 = re.sub(r"\^", " ", arabic_out)
    return arabic_out2

#### Arabic Word Select

In [103]:
df_arabic_translate = pd.read_excel("/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Arabic/Quaran/Flash Cards/Data/Quran_All_Translate_Fill_Asterisks.xlsx")
#df_arabic_translate = pd.read_excel("/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Arabic/Quaran/Flash Cards/Data/All_Surah_Translate_File_Concat.xlsx")
df_arabic_translate

Unnamed: 0,surah,surah latin,arabic,transliterate,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian,index
0,1,Al-Fatiha,بِسۡمِ,bis'mi,In (the) name,ساتھ نام,साथ नाम,dengan nama,নামে,adıyla,С именем,0
1,1,Al-Fatiha,ٱللَّهِ,al-lahi,(of) Allah,اللہ کے,अल्लाह के,Allah,আল্লাহ (র),Allah'ın,"Аллаха,",1
2,1,Al-Fatiha,ٱلرَّحۡمَٰنِ,al-rahmani,the Most Gracious,جو بے حد مہربان ہے,जो बहुत मेहरबान,Maha Pengasih,পরম করুণাময়,Rahman,"Милостивого,",2
3,1,Al-Fatiha,ٱلرَّحِيمِ,al-rahimi,the Most Merciful,بار بار رحم فرمانے والا ہے,निहायत रहम करने वाला है,Maha Penyayang,অসীম দয়ালু,Rahim,Милосердного!,3
4,1,Al-Fatiha,ٱلۡحَمۡدُ,al-hamdu,All praises and thanks,سب تعریف,सब तारीफ़,pujian,সকল প্রশংসা,hamdolsun,Хвала,4
...,...,...,...,...,...,...,...,...,...,...,...,...
77424,114,An-Nas,صُدُورِ,suduri,(the) breasts,سینوں,सीनों में,dada,অন্তরসমূহের,*,грудях,78244
77425,114,An-Nas,ٱلنَّاسِ,al-nasi,(of) mankind,انسانوں کے,लोगों के,manusia,মানুষের,insanların,"людей,",78245
77426,114,An-Nas,مِنَ,mina,From,سے,*,dari,মধ্য হতে,cinlerden,(будучи) из (числа),78246
77427,114,An-Nas,ٱلۡجِنَّةِ,al-jinati,the jinn,جنوں میں,जिन्नों में से,jin,জিনের,*,джиннов,78247


In [104]:
df_arabic_word_count = df_arabic_translate["arabic"].value_counts(ascending=False).reset_index()
df_arabic_word_count.rename(columns={"index":"arabic","arabic":"arabic_frequency"}, inplace=True)
#df_arabic_word_count = df_arabic_word_count.head(300)
df_arabic_word_count

Unnamed: 0,arabic,arabic_frequency
0,فِي,1096
1,ٱلَّذِينَ,810
2,مِن,728
3,مَا,709
4,ٱللَّهِ,667
...,...,...
21304,ٱلۡجُبِّ,1
21305,يَلۡتَقِطۡهُ,1
21306,ٱلسَّيَّارَةِ,1
21307,تَأۡمَ۬نَّا,1


In [105]:
df_word_translate_select = df_arabic_translate.iloc[:,[2,3,4,5,6,7,8,9,10]]
df_word_translate_select.drop_duplicates(inplace=True)
df_word_translate_select

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,arabic,transliterate,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,بِسۡمِ,bis'mi,In (the) name,ساتھ نام,साथ नाम,dengan nama,নামে,adıyla,С именем
1,ٱللَّهِ,al-lahi,(of) Allah,اللہ کے,अल्लाह के,Allah,আল্লাহ (র),Allah'ın,"Аллаха,"
2,ٱلرَّحۡمَٰنِ,al-rahmani,the Most Gracious,جو بے حد مہربان ہے,जो बहुत मेहरबान,Maha Pengasih,পরম করুণাময়,Rahman,"Милостивого,"
3,ٱلرَّحِيمِ,al-rahimi,the Most Merciful,بار بار رحم فرمانے والا ہے,निहायत रहम करने वाला है,Maha Penyayang,অসীম দয়ালু,Rahim,Милосердного!
4,ٱلۡحَمۡدُ,al-hamdu,All praises and thanks,سب تعریف,सब तारीफ़,pujian,সকল প্রশংসা,hamdolsun,Хвала
...,...,...,...,...,...,...,...,...,...
77424,صُدُورِ,suduri,(the) breasts,سینوں,सीनों में,dada,অন্তরসমূহের,*,грудях
77425,ٱلنَّاسِ,al-nasi,(of) mankind,انسانوں کے,लोगों के,manusia,মানুষের,insanların,"людей,"
77426,مِنَ,mina,From,سے,*,dari,মধ্য হতে,cinlerden,(будучи) из (числа)
77427,ٱلۡجِنَّةِ,al-jinati,the jinn,جنوں میں,जिन्नों में से,jin,জিনের,*,джиннов


In [106]:
df_arabic_count_translate_merge = pd.merge(df_arabic_word_count, df_word_translate_select, how="left", on="arabic")
df_arabic_count_translate_merge.drop_duplicates(inplace=True)
df_arabic_count_translate_merge 

Unnamed: 0,arabic,arabic_frequency,transliterate,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,فِي,1096,fi,In,میں,*,di dalam,মধ্যে আছে,onların kablerinde,В
1,فِي,1096,fi,in,میں,*,di,মধ্যে,yeryüzünde,на
2,فِي,1096,fi,in,ان کی,*,dalam,মধ্যে,içinde,в
3,فِي,1096,fi,in,میں,*,dalam,মধ্যে,içinde,во
4,فِي,1096,fi,in,میں,*,dalam,মধ্যে,içinde,в
...,...,...,...,...,...,...,...,...,...,...
69905,ٱلۡجُبِّ,1,al-jubi,(of) the well,کنوئیں میں۔ کنوئیں کی گہرائی میں,कुएँ की,sumur,কূপের,kuyunun,"колодца,–"
69906,يَلۡتَقِطۡهُ,1,yaltaqit'hu,will pick him,اٹھا لے گا اس کو,उठा लेगा उसे,menemuinya,তাকে তুলে নিবে,onu (görüp) alsın,подберёт его
69907,ٱلسَّيَّارَةِ,1,al-sayarati,[the] caravan,قافلہ,क़ाफ़िला,orang-orang yang berjalan,পথযাত্রীদলের,kervanlardan,"караван,"
69908,تَأۡمَ۬نَّا,1,tamanna,trust us,آپ بھروسہ کرتے ہم,नहीं आप भरोसा करते हम पर,kamu mempercayai kami,আমাদের বিশ্বাস করেন,*,доверяешь ты нам


In [107]:
#df_arabic_count_translate_merge.to_excel("Quran_Arabic_Word_Translate_Selected.xlsx", index=False) 

In [108]:
#df_arabic_count_translate_merge["buckwalter"] = df_arabic_count_translate_merge.loc[:,"arabic"].apply(lambda x : transString(x,0))
#df_arabic_count_translate_merge

In [109]:
#df_arabic_count_translate_merge["buckwalter_simple"] = df_arabic_count_translate_merge.loc[:,"buckwalter"].apply(lambda x : clean_ex(x))
#df_arabic_count_translate_merge

In [110]:
#df_arabic_count_translate_merge["arabic_simple"] = df_arabic_count_translate_merge.loc[:,"buckwalter_simple"].apply(lambda x : transString(x,1))
#df_arabic_count_translate_merge

In [111]:
df_arabic_count_translate_merge["buckwalter"] = df_arabic_count_translate_merge.loc[:,"arabic"].apply(lambda x : transString(x,0))
df_arabic_count_translate_merge["buckwalter_simple"] = df_arabic_count_translate_merge.loc[:,"buckwalter"].apply(lambda x : clean_ex(x))# Convert Arabic Simple
df_arabic_count_translate_merge["arabic_simple"] = df_arabic_count_translate_merge.loc[:,"buckwalter_simple"].apply(lambda x : transString(x,1))
df_arabic_count_translate_merge.drop(["buckwalter","buckwalter_simple"], axis=1, inplace=True)
df_arabic_count_translate_merge = df_arabic_count_translate_merge.iloc[:,[0,10,1,2,3,4,5,6,7,8,9]]
df_arabic_count_translate_merge

Unnamed: 0,arabic,arabic_simple,arabic_frequency,transliterate,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,فِي,في,1096,fi,In,میں,*,di dalam,মধ্যে আছে,onların kablerinde,В
1,فِي,في,1096,fi,in,میں,*,di,মধ্যে,yeryüzünde,на
2,فِي,في,1096,fi,in,ان کی,*,dalam,মধ্যে,içinde,в
3,فِي,في,1096,fi,in,میں,*,dalam,মধ্যে,içinde,во
4,فِي,في,1096,fi,in,میں,*,dalam,মধ্যে,içinde,в
...,...,...,...,...,...,...,...,...,...,...,...
69905,ٱلۡجُبِّ,ٱلۡجب,1,al-jubi,(of) the well,کنوئیں میں۔ کنوئیں کی گہرائی میں,कुएँ की,sumur,কূপের,kuyunun,"колодца,–"
69906,يَلۡتَقِطۡهُ,يلۡتقطۡه,1,yaltaqit'hu,will pick him,اٹھا لے گا اس کو,उठा लेगा उसे,menemuinya,তাকে তুলে নিবে,onu (görüp) alsın,подберёт его
69907,ٱلسَّيَّارَةِ,ٱلسيارة,1,al-sayarati,[the] caravan,قافلہ,क़ाफ़िला,orang-orang yang berjalan,পথযাত্রীদলের,kervanlardan,"караван,"
69908,تَأۡمَ۬نَّا,تأۡمنا,1,tamanna,trust us,آپ بھروسہ کرتے ہم,नहीं आप भरोसा करते हम पर,kamu mempercayai kami,আমাদের বিশ্বাস করেন,*,доверяешь ты нам


In [112]:
df_select_english = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency","transliterate"]).apply(lambda x: x['translate_english'].value_counts(ascending=False).index[0]))
df_select_english.rename(columns={0:"translate_english"}, inplace=True)
df_select_english.reset_index(inplace=True)
df_select_english.sort_values(by="arabic_frequency", ascending=False, inplace=True)
df_select_english.reset_index(drop=True, inplace=True)
df_select_english

Unnamed: 0,arabic,arabic_simple,arabic_frequency,transliterate,translate_english
0,فِي,في,1096,fi,in
1,ٱلَّذِينَ,ٱلذين,810,alladhina,those who
2,مِن,من,728,min,from
3,مَا,ما,709,ma,what
4,ٱللَّهِ,ٱلله,667,al-lahi,Allah
...,...,...,...,...,...
21306,فَذُو,فذو,1,fadhu,then (he is) full
21307,فَذُوقُوهُ,فذوقوه,1,fadhuquhu,So taste it
21308,فَرَأَوۡهُ,فرأوۡه,1,fara-awhu,and they see it
21309,فَرَبُّكُمۡ,فربكمۡ,1,farabbukum,but your Lord


In [113]:
df_select_urdu = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency","transliterate"]).apply(lambda x: x['translate_urdu'].value_counts(ascending=False).index[0]))
df_select_urdu.rename(columns={0:"translate_urdu"}, inplace=True)
df_select_urdu.reset_index(inplace=True)
df_select_urdu.reset_index(drop=True, inplace=True)
df_select_urdu

Unnamed: 0,arabic,arabic_simple,arabic_frequency,transliterate,translate_urdu
0,ءَأَتَّخِذُ,ءأتخذ,1,a-attakhidhu,کیا میں بنالوں
1,ءَأَرۡبَابٞ,ءأرۡبابٞ,1,a-arbabun,کیا بہت سے رب
2,ءَأَسۡجُدُ,ءأسۡجد,1,a-asjudu,کیا میں سجدہ کروں
3,ءَأَسۡلَمۡتُمۡۚ,ءأسۡلمۡتمۡۚ,1,a-aslamtum,اسلام لائے تم۔ سپرد کردیا تم نے
4,ءَأَشۡفَقۡتُمۡ,ءأشۡفقۡتمۡ,1,a-ashfaqtum,کیا ڈر گئے تم
...,...,...,...,...,...
21306,۞يَـٰٓأَيُّهَا,۞يـأيها,8,yaayyuha,اے
21307,۞يَوۡمَ,۞يوۡم,2,yawma,جس دن
21308,۞يَٰبَنِيٓ,۞يبني,1,yabani,اے بنی
21309,۞ٱحۡشُرُواْ,۞ٱحۡشروا,1,uh'shuru,گھیر لاؤ


In [114]:
df_select_hindi = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency","transliterate"]).apply(lambda x: x['translate_hindi'].value_counts(ascending=False).index[0]))
df_select_hindi.rename(columns={0:"translate_hindi"}, inplace=True)
df_select_hindi.reset_index(inplace=True)
df_select_hindi.sort_values(by="arabic_frequency", ascending=False, inplace=True)
df_select_hindi.reset_index(drop=True, inplace=True)
df_select_hindi

Unnamed: 0,arabic,arabic_simple,arabic_frequency,transliterate,translate_hindi
0,فِي,في,1096,fi,*
1,ٱلَّذِينَ,ٱلذين,810,alladhina,वो जो
2,مِن,من,728,min,*
3,مَا,ما,709,ma,जो
4,ٱللَّهِ,ٱلله,667,al-lahi,अल्लाह के
...,...,...,...,...,...
21306,فَذُو,فذو,1,fadhu,*
21307,فَذُوقُوهُ,فذوقوه,1,fadhuquhu,पस चख़ो इसे
21308,فَرَأَوۡهُ,فرأوۡه,1,fara-awhu,फिर वो देखें उस (खेती) को
21309,فَرَبُّكُمۡ,فربكمۡ,1,farabbukum,तो रब तुम्हारा


In [115]:
df_select_indonesian = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency","transliterate"]).apply(lambda x: x['translate_indonesian'].value_counts(ascending=False).index[0]))
df_select_indonesian.rename(columns={0:"translate_indonesian"}, inplace=True)
df_select_indonesian.reset_index(inplace=True)
df_select_indonesian.sort_values(by="arabic_frequency", ascending=False, inplace=True)
df_select_indonesian.reset_index(drop=True, inplace=True)
df_select_indonesian

Unnamed: 0,arabic,arabic_simple,arabic_frequency,transliterate,translate_indonesian
0,فِي,في,1096,fi,dalam
1,ٱلَّذِينَ,ٱلذين,810,alladhina,orang-orang yang
2,مِن,من,728,min,dari
3,مَا,ما,709,ma,apa
4,ٱللَّهِ,ٱلله,667,al-lahi,Allah
...,...,...,...,...,...
21306,فَذُو,فذو,1,fadhu,maka ia mempunyai
21307,فَذُوقُوهُ,فذوقوه,1,fadhuquhu,maka rasakanlah ia
21308,فَرَأَوۡهُ,فرأوۡه,1,fara-awhu,maka mereka melihatnya
21309,فَرَبُّكُمۡ,فربكمۡ,1,farabbukum,maka Tuhanmu


In [116]:
df_select_bangla = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency","transliterate"]).apply(lambda x: x['translate_bangla'].value_counts(ascending=False).index[0]))
df_select_bangla.rename(columns={0:"translate_bangla"}, inplace=True)
df_select_bangla.reset_index(inplace=True)
df_select_bangla.sort_values(by="arabic_frequency", ascending=False, inplace=True)
df_select_bangla.reset_index(drop=True, inplace=True)
df_select_bangla

Unnamed: 0,arabic,arabic_simple,arabic_frequency,transliterate,translate_bangla
0,فِي,في,1096,fi,মধ্যে
1,ٱلَّذِينَ,ٱلذين,810,alladhina,যারা
2,مِن,من,728,min,থেকে
3,مَا,ما,709,ma,যা
4,ٱللَّهِ,ٱلله,667,al-lahi,আল্লাহর
...,...,...,...,...,...
21306,فَذُو,فذو,1,fadhu,তখন
21307,فَذُوقُوهُ,فذوقوه,1,fadhuquhu,"""তাই তোমরা স্বাদ নাও তার"""
21308,فَرَأَوۡهُ,فرأوۡه,1,fara-awhu,ফলে তারা দেখে তা
21309,فَرَبُّكُمۡ,فربكمۡ,1,farabbukum,অতঃপর তোমাদের রবই


In [117]:
df_select_turkish = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency","transliterate"]).apply(lambda x: x['translate_turkish'].value_counts(ascending=False).index[0]))
df_select_turkish.rename(columns={0:"translate_turkish"}, inplace=True)
df_select_turkish.reset_index(inplace=True)
df_select_turkish.sort_values(by="arabic_frequency", ascending=False, inplace=True)
df_select_turkish.reset_index(drop=True, inplace=True)
df_select_turkish

Unnamed: 0,arabic,arabic_simple,arabic_frequency,transliterate,translate_turkish
0,فِي,في,1096,fi,içinde
1,ٱلَّذِينَ,ٱلذين,810,alladhina,kimseler
2,مِن,من,728,min,hiçbir
3,مَا,ما,709,ma,şeyleri
4,ٱللَّهِ,ٱلله,667,al-lahi,Allah'ın
...,...,...,...,...,...
21306,فَذُو,فذو,1,fadhu,hemen
21307,فَذُوقُوهُ,فذوقوه,1,fadhuquhu,şimdi tadın onu
21308,فَرَأَوۡهُ,فرأوۡه,1,fara-awhu,ve (ekini) görseler
21309,فَرَبُّكُمۡ,فربكمۡ,1,farabbukum,Rabbiniz


In [118]:
df_select_russian = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency","transliterate"]).apply(lambda x: x['translate_russian'].value_counts(ascending=False).index[0]))
df_select_russian.rename(columns={0:"translate_russian"}, inplace=True)
df_select_russian.reset_index(inplace=True)
df_select_russian.sort_values(by="arabic_frequency", ascending=False, inplace=True)
df_select_russian.reset_index(drop=True, inplace=True)
df_select_russian

Unnamed: 0,arabic,arabic_simple,arabic_frequency,transliterate,translate_russian
0,فِي,في,1096,fi,в
1,ٱلَّذِينَ,ٱلذين,810,alladhina,"тех, которые"
2,مِن,من,728,min,из
3,مَا,ما,709,ma,"то, что"
4,ٱللَّهِ,ٱلله,667,al-lahi,Аллаха
...,...,...,...,...,...
21306,فَذُو,فذو,1,fadhu,то (становится он) обладателем
21307,فَذُوقُوهُ,فذوقوه,1,fadhuquhu,Вкусите же его
21308,فَرَأَوۡهُ,فرأوۡه,1,fara-awhu,и они увидят это
21309,فَرَبُّكُمۡ,فربكمۡ,1,farabbukum,а Господь ваш


In [124]:
dfs = [df_select_english,df_select_urdu,df_select_hindi,df_select_indonesian,df_select_bangla,df_select_turkish,df_select_russian]

In [125]:
df_select_translate_all = reduce(lambda  left,right: pd.merge(left,right, on=['arabic','arabic_simple','arabic_frequency','transliterate'], how='inner'), dfs)  # left,right make left to right merge
#df_select_translate_all = reduce(lambda  right,left: pd.merge(left,right, on=['word'], how='outer'), dfs)  # right,left make right to left merge
df_select_translate_all.drop_duplicates(inplace=True)
df_select_translate_all

Unnamed: 0,arabic,arabic_simple,arabic_frequency,transliterate,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,فِي,في,1096,fi,in,میں,*,dalam,মধ্যে,içinde,в
1,ٱلَّذِينَ,ٱلذين,810,alladhina,those who,وہ لوگ,वो जो,orang-orang yang,যারা,kimseler,"тех, которые"
2,مِن,من,728,min,from,سے,*,dari,থেকে,hiçbir,из
3,مَا,ما,709,ma,what,جو,जो,apa,যা,şeyleri,"то, что"
4,ٱللَّهِ,ٱلله,667,al-lahi,Allah,اللہ کے,अल्लाह के,Allah,আল্লাহর,Allah'ın,Аллаха
...,...,...,...,...,...,...,...,...,...,...,...
21306,فَذُو,فذو,1,fadhu,then (he is) full,تو کرنے والا ہوتا ہے,*,maka ia mempunyai,তখন,hemen,то (становится он) обладателем
21307,فَذُوقُوهُ,فذوقوه,1,fadhuquhu,So taste it,پس چکھو اس کو,पस चख़ो इसे,maka rasakanlah ia,"""তাই তোমরা স্বাদ নাও তার""",şimdi tadın onu,Вкусите же его
21308,فَرَأَوۡهُ,فرأوۡه,1,fara-awhu,and they see it,پھر وہ دیکھیں اس کو,फिर वो देखें उस (खेती) को,maka mereka melihatnya,ফলে তারা দেখে তা,ve (ekini) görseler,и они увидят это
21309,فَرَبُّكُمۡ,فربكمۡ,1,farabbukum,but your Lord,تو رب تمہارا,तो रब तुम्हारा,maka Tuhanmu,অতঃপর তোমাদের রবই,Rabbiniz,а Господь ваш


In [119]:
#df_arabic_count_translate_merge.to_excel("Quran_Arabic_Word_Translate_Selected2.xlsx", index=False) 

#### Arabic Root With Related Arabic Word

In [None]:
df_master = pd.read_excel("")  # read master file