### Flash Cards Data Analysis

In [78]:
import os
import pandas as pd
import numpy as np
import re
import glob
from functools import reduce
from camel_tools.utils.charmap import CharMapper
from lang_trans.arabic import buckwalter
from pathlib import Path
import shutil

In [79]:
path = f"/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Arabic/Quran/Flash Cards/Result"

Path(path).mkdir(parents=True, exist_ok=True)

In [80]:
# buckwalter text clean for simple before transString
def clean_ex(text):
    buckwalter_out = re.sub(r'''([PJVG\.:;,!\+\^\]\[@#FNKauio`~"%-])''', "", text) # difference from clean
    return buckwalter_out

In [81]:
# Arabic Encoding Extended
# -*- coding: utf-8 -*-

# Arabic Transliteration based on Buckwalter
# dictionary source is buckwalter2unicode.py http://www.redhat.com/archives/fedora-extras-commits/2007-June/msg03617.html 

buck2uni = {"'": u"\u0621", # hamza-on-the-line
            "|": u"\u0622", # madda
            ">": u"\u0623", # hamza-on-'alif
            "&": u"\u0624", # hamza-on-waaw
            "<": u"\u0625", # hamza-under-'alif
            "}": u"\u0626", # hamza-on-yaa'
            "A": u"\u0627", # bare 'alif
            "b": u"\u0628", # baa'
            "p": u"\u0629", # taa' marbuuTa
            "t": u"\u062A", # taa'
            "v": u"\u062B", # thaa'
            "j": u"\u062C", # jiim
            "H": u"\u062D", # Haa'
            "x": u"\u062E", # khaa'
            "d": u"\u062F", # daal
            "*": u"\u0630", # dhaal
            "r": u"\u0631", # raa'
            "z": u"\u0632", # zaay
            "s": u"\u0633", # siin
            "$": u"\u0634", # shiin
            "S": u"\u0635", # Saad
            "D": u"\u0636", # Daad
            "T": u"\u0637", # Taa'
            "Z": u"\u0638", # Zaa' (DHaa')
            "E": u"\u0639", # cayn
            "g": u"\u063A", # ghayn
            "_": u"\u0640", # taTwiil
            "f": u"\u0641", # faa'
            "q": u"\u0642", # qaaf
            "k": u"\u0643", # kaaf
            "l": u"\u0644", # laam
            "m": u"\u0645", # miim
            "n": u"\u0646", # nuun
            "h": u"\u0647", # haa'
            "w": u"\u0648", # waaw
            "Y": u"\u0649", # 'alif maqSuura
            "y": u"\u064A", # yaa'
            "F": u"\u064B", # fatHatayn
            "N": u"\u064C", # Dammatayn
            "K": u"\u064D", # kasratayn
            "a": u"\u064E", # fatHa
            "u": u"\u064F", # Damma
            "i": u"\u0650", # kasra
            "~": u"\u0651", # shaddah
            "o": u"\u0652", # sukuun
            "^": u"\u0653", # maddah
            "#": u"\u0654", # hamzaabove            
            "`": u"\u0670", # dagger 'alif
            "{": u"\u0671", # waSla
            "P": u"\u067E", # arabicletterpeh
            "J": u"\u0686", 
            "V": u"\u06A4", # arabicletterveh
            "G": u"\u06AF", # arabiclettergaf
            ":": u"\u06DC", # smallhighseen
            "@": u"\u06DF", # smallhighroundedzero
            "\"": u"\u06E0", # smallhighuprightrectangularzero
            "[": u"\u06E2", # smallhighmeemisolatedform
            ";": u"\u06E3", # smalllowseen
            ",": u"\u06E5", # smallwaw
            ".": u"\u06E6", # smallya
            "!": u"\u06E8", # smallhighnoon
            "-": u"\u06EA", # emptycentrelowstop
            "+": u"\u06EB", # emptycentrehighstop
            "%": u"\u06EC", # roundedhighstopwithfilledcentre
            "]": u"\u06ED", # smalllowmeem           
}

def transString(string, reverse=0):
    '''Given a Unicode string, transliterate into Buckwalter. To go from
    Buckwalter back to Unicode, set reverse=1'''

    for k, v in buck2uni.items():
      if not reverse:
            string = string.replace(v, k)
      else:
            string = string.replace(k, v)

    return string

In [82]:
# Simple Technique
# -*- coding: utf-8 -*-

# Arabic Transliteration based on Buckwalter
# dictionary source is buckwalter2unicode.py http://www.redhat.com/archives/fedora-extras-commits/2007-June/msg03617.html 

buck2unisimple = {"'": u"\u0621", # hamza-on-the-line
            "|": u"\u0622", # madda
            ">": u"\u0623", # hamza-on-'alif
            "&": u"\u0624", # hamza-on-waaw
            "<": u"\u0625", # hamza-under-'alif
            "A": u"\u0627", # bare 'alif
            "b": u"\u0628", # baa'
            "p": u"\u0629", # taa' marbuuTa
            "t": u"\u062A", # taa'
            "v": u"\u062B", # thaa'
            "j": u"\u062C", # jiim
            "H": u"\u062D", # Haa'
            "x": u"\u062E", # khaa'
            "d": u"\u062F", # daal
            "*": u"\u0630", # dhaal
            "r": u"\u0631", # raa'
            "z": u"\u0632", # zaay
            "s": u"\u0633", # siin
            "$": u"\u0634", # shiin
            "S": u"\u0635", # Saad
            "D": u"\u0636", # Daad
            "T": u"\u0637", # Taa'
            "Z": u"\u0638", # Zaa' (DHaa')
            "E": u"\u0639", # cayn
            "g": u"\u063A", # ghayn
            "_": u"\u0640", # taTwiil
            "f": u"\u0641", # faa'
            "q": u"\u0642", # qaaf
            "k": u"\u0643", # kaaf
            "l": u"\u0644", # laam
            "m": u"\u0645", # miim
            "n": u"\u0646", # nuun
            "h": u"\u0647", # haa'
            "w": u"\u0648", # waaw
            "Y": u"\u0649", # 'alif maqSuura
            "y": u"\u064A", # yaa'
            "{": u"\u0671", # waSla         
}

def transStringSimple(string, reverse=0):
    '''Given a Unicode string, transliterate into Buckwalter. To go from
    Buckwalter back to Unicode, set reverse=1'''

    for k, v in buck2unisimple.items():
      if not reverse:
            string = string.replace(v, k)
      else:
            string = string.replace(k, v)

    return string

In [83]:
# character clean for simple after transStringSimple 
def clean(text):
    arabic_out = re.sub(r'''([PJVG\.:;,!\+\]\[@#FNKauio`~"%-])''', "", text) # .$^*+
    arabic_out2 = re.sub(r"\^", " ", arabic_out)
    return arabic_out2

#### Quran Word Translate Select

In [103]:
df_quran_master = pd.read_excel("/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Arabic/Quran/Flash Cards/Data/Master 17052022.xlsx")
df_all_word_translate = df_quran_master.loc[:,["surah","surah latin","tanzil_clean","arabic","translate_english","translate_urdu","translate_hindi","translate_indonesian","translate_bangla","translate_turkish","translate_russian"]]
#df_all_word_translate = df_quran_master.loc[:,["surah","surah latin","tanzil_clean","tanzil_plain","arabic","translate_english","translate_urdu","translate_hindi","translate_indonesian","translate_bangla","translate_turkish","translate_russian"]]  # used for arabic plain
df_all_word_translate

Unnamed: 0,surah,surah latin,tanzil_clean,arabic,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,1,Al-Fatiha,بسم,بِسۡمِ,In (the) name,ساتھ نام,साथ नाम,dengan nama,নামে,adıyla,С именем
1,1,Al-Fatiha,الله,ٱللَّهِ,(of) Allah,اللہ کے,अल्लाह के,Allah,আল্লাহ (র),Allah'ın,"Аллаха,"
2,1,Al-Fatiha,الرحمن,ٱلرَّحۡمَٰنِ,the Most Gracious,جو بے حد مہربان ہے,जो बहुत मेहरबान,Maha Pengasih,পরম করুণাময়,Rahman,"Милостивого,"
3,1,Al-Fatiha,الرحيم,ٱلرَّحِيمِ,the Most Merciful,بار بار رحم فرمانے والا ہے,निहायत रहम करने वाला है,Maha Penyayang,অসীম দয়ালু,Rahim,Милосердного!
4,1,Al-Fatiha,الحمد,ٱلۡحَمۡدُ,All praises and thanks,سب تعریف,सब तारीफ़,pujian,সকল প্রশংসা,hamdolsun,Хвала
...,...,...,...,...,...,...,...,...,...,...,...
78242,114,An-Nas,صدور,صُدُورِ,(the) breasts,سینوں,सीनों में,dada,অন্তরসমূহের,göğüslerine,грудях
78243,114,An-Nas,الناس,ٱلنَّاسِ,(of) mankind,انسانوں کے,लोगों के,manusia,মানুষের,insanların,"людей,"
78244,114,An-Nas,من,مِنَ,From,سے,जिन्नों में से,dari,মধ্য হতে,cinlerden,(будучи) из (числа)
78245,114,An-Nas,الجنة,ٱلۡجِنَّةِ,the jinn,جنوں میں,जिन्नों में से,jin,জিনের,cinlerden,джиннов


In [104]:
df_all_word_translate = df_all_word_translate[~df_all_word_translate["arabic"].isnull()]
df_all_word_translate

Unnamed: 0,surah,surah latin,tanzil_clean,arabic,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,1,Al-Fatiha,بسم,بِسۡمِ,In (the) name,ساتھ نام,साथ नाम,dengan nama,নামে,adıyla,С именем
1,1,Al-Fatiha,الله,ٱللَّهِ,(of) Allah,اللہ کے,अल्लाह के,Allah,আল্লাহ (র),Allah'ın,"Аллаха,"
2,1,Al-Fatiha,الرحمن,ٱلرَّحۡمَٰنِ,the Most Gracious,جو بے حد مہربان ہے,जो बहुत मेहरबान,Maha Pengasih,পরম করুণাময়,Rahman,"Милостивого,"
3,1,Al-Fatiha,الرحيم,ٱلرَّحِيمِ,the Most Merciful,بار بار رحم فرمانے والا ہے,निहायत रहम करने वाला है,Maha Penyayang,অসীম দয়ালু,Rahim,Милосердного!
4,1,Al-Fatiha,الحمد,ٱلۡحَمۡدُ,All praises and thanks,سب تعریف,सब तारीफ़,pujian,সকল প্রশংসা,hamdolsun,Хвала
...,...,...,...,...,...,...,...,...,...,...,...
78242,114,An-Nas,صدور,صُدُورِ,(the) breasts,سینوں,सीनों में,dada,অন্তরসমূহের,göğüslerine,грудях
78243,114,An-Nas,الناس,ٱلنَّاسِ,(of) mankind,انسانوں کے,लोगों के,manusia,মানুষের,insanların,"людей,"
78244,114,An-Nas,من,مِنَ,From,سے,जिन्नों में से,dari,মধ্য হতে,cinlerden,(будучи) из (числа)
78245,114,An-Nas,الجنة,ٱلۡجِنَّةِ,the jinn,جنوں میں,जिन्नों में से,jin,জিনের,cinlerden,джиннов


In [105]:
df_all_word_translate.drop("arabic", axis=1, inplace=True)
#df_all_word_translate.rename(columns={"tanzil_plain":"arabic","tanzil_clean":"arabic_simple"}, inplace=True)  # used for arabic plain
df_all_word_translate.rename(columns={"tanzil_clean":"arabic_simple"}, inplace=True)
df_all_word_translate

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,surah,surah latin,arabic_simple,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,1,Al-Fatiha,بسم,In (the) name,ساتھ نام,साथ नाम,dengan nama,নামে,adıyla,С именем
1,1,Al-Fatiha,الله,(of) Allah,اللہ کے,अल्लाह के,Allah,আল্লাহ (র),Allah'ın,"Аллаха,"
2,1,Al-Fatiha,الرحمن,the Most Gracious,جو بے حد مہربان ہے,जो बहुत मेहरबान,Maha Pengasih,পরম করুণাময়,Rahman,"Милостивого,"
3,1,Al-Fatiha,الرحيم,the Most Merciful,بار بار رحم فرمانے والا ہے,निहायत रहम करने वाला है,Maha Penyayang,অসীম দয়ালু,Rahim,Милосердного!
4,1,Al-Fatiha,الحمد,All praises and thanks,سب تعریف,सब तारीफ़,pujian,সকল প্রশংসা,hamdolsun,Хвала
...,...,...,...,...,...,...,...,...,...,...
78242,114,An-Nas,صدور,(the) breasts,سینوں,सीनों में,dada,অন্তরসমূহের,göğüslerine,грудях
78243,114,An-Nas,الناس,(of) mankind,انسانوں کے,लोगों के,manusia,মানুষের,insanların,"людей,"
78244,114,An-Nas,من,From,سے,जिन्नों में से,dari,মধ্য হতে,cinlerden,(будучи) из (числа)
78245,114,An-Nas,الجنة,the jinn,جنوں میں,जिन्नों में से,jin,জিনের,cinlerden,джиннов


In [106]:
#df_arabic_word_count = df_all_word_translate["arabic"].value_counts(ascending=False).reset_index()  # used for arabic plain
#df_arabic_word_count.rename(columns={"index":"arabic","arabic":"arabic_frequency"}, inplace=True)  # used for arabic plain

df_arabic_word_count = df_all_word_translate["arabic_simple"].value_counts(ascending=False).reset_index()
df_arabic_word_count.rename(columns={"index":"arabic_simple","arabic_simple":"arabic_simple_frequency"}, inplace=True)
#df_arabic_word_count = df_arabic_word_count.head(300)
df_arabic_word_count

Unnamed: 0,arabic_simple,arabic_simple_frequency
0,من,2763
1,الله,2153
2,في,1185
3,ما,1013
4,إن,966
...,...,...
14862,نفقد,1
14863,صواع,1
14864,لنفسد,1
14865,سارقين,1


In [107]:
#df_word_translate_select = df_all_word_translate.iloc[:,[2,3,4,5,6,7,8,9,10]]  # used for arabic plain
df_word_translate_select = df_all_word_translate.iloc[:,[2,3,4,5,6,7,8,9]]
df_word_translate_select.drop_duplicates(inplace=True)
df_word_translate_select

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,arabic_simple,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,بسم,In (the) name,ساتھ نام,साथ नाम,dengan nama,নামে,adıyla,С именем
1,الله,(of) Allah,اللہ کے,अल्लाह के,Allah,আল্লাহ (র),Allah'ın,"Аллаха,"
2,الرحمن,the Most Gracious,جو بے حد مہربان ہے,जो बहुत मेहरबान,Maha Pengasih,পরম করুণাময়,Rahman,"Милостивого,"
3,الرحيم,the Most Merciful,بار بار رحم فرمانے والا ہے,निहायत रहम करने वाला है,Maha Penyayang,অসীম দয়ালু,Rahim,Милосердного!
4,الحمد,All praises and thanks,سب تعریف,सब तारीफ़,pujian,সকল প্রশংসা,hamdolsun,Хвала
...,...,...,...,...,...,...,...,...
78242,صدور,(the) breasts,سینوں,सीनों में,dada,অন্তরসমূহের,göğüslerine,грудях
78243,الناس,(of) mankind,انسانوں کے,लोगों के,manusia,মানুষের,insanların,"людей,"
78244,من,From,سے,जिन्नों में से,dari,মধ্য হতে,cinlerden,(будучи) из (числа)
78245,الجنة,the jinn,جنوں میں,जिन्नों में से,jin,জিনের,cinlerden,джиннов


In [108]:
#df_arabic_count_translate_merge = pd.merge(df_arabic_word_count, df_word_translate_select, how="left", on="arabic")  # used for arabic plain
df_arabic_count_translate_merge = pd.merge(df_arabic_word_count, df_word_translate_select, how="left", on="arabic_simple")
df_arabic_count_translate_merge.drop_duplicates(inplace=True)
df_arabic_count_translate_merge 

Unnamed: 0,arabic_simple,arabic_simple_frequency,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,من,2763,from,سے,आपसे पहले,dari,থেকে,senden önce,до тебя
1,من,2763,from,سے,अपने रब की तरफ़ से,dari,পক্ষ হতে,Rablerinden,от
2,من,2763,(are some) who,جو,जो,orang yang,যারা,öyleleri de,"тот, кто"
3,من,2763,from,سے,आसमान से,dari,থেকে,gökten,с
4,من,2763,from,سے,बिजली के कड़ाकों से,dari,কারনে,yıldırım seslerinden,от
...,...,...,...,...,...,...,...,...,...
69898,نفقد,1,We are missing,ہم گم پاتے ہیں,हम गुम पाते हैं,kami kehilangan,"""আমরা হারিয়েছি",kaybettik,«Ищем мы
69899,صواع,1,(the) cup,پیمانہ,पैमाना,piala,পানপাত্র,su tasını,чашу
69900,لنفسد,1,that we cause corruption,کہ ہم فساد کریں,कि हम फ़साद करें,untuk membuat kerusakan,জন্যে আমরা খারাপ কাজ করার,bozgunculuk yapmak için,чтобы сеять беспорядок
69901,سارقين,1,thieves,چور,चोरी करने वाले,orang-orang yang mencuri,"চোর""",hırsız,ворами».


In [109]:
#df_select_english = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency"]).apply(lambda x: x['translate_english'].value_counts(ascending=False).index[0]))  # used for arabic plain
df_select_english = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic_simple","arabic_simple_frequency"]).apply(lambda x: x['translate_english'].value_counts(ascending=False).index[0]))
df_select_english.rename(columns={0:"translate_english"}, inplace=True)
df_select_english.reset_index(inplace=True)
#df_select_english.sort_values(by="arabic_frequency", ascending=False, inplace=True)  # used for arabic plain
df_select_english.sort_values(by="arabic_simple_frequency", ascending=False, inplace=True)
df_select_english.reset_index(drop=True, inplace=True)
df_select_english

Unnamed: 0,arabic_simple,arabic_simple_frequency,translate_english
0,من,2763,from
1,الله,2153,Allah
2,في,1185,in
3,ما,1013,what
4,إن,966,Indeed
...,...,...,...
14862,طردتهم,1,I drove them away
14863,طرف,1,a glance
14864,طرفا,1,a part
14865,طرفك,1,your glance


In [110]:
#df_select_urdu = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency"]).apply(lambda x: x['translate_urdu'].value_counts(ascending=False).index[0]))  # used for arabic plain
df_select_urdu = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic_simple","arabic_simple_frequency"]).apply(lambda x: x['translate_urdu'].value_counts(ascending=False).index[0]))
df_select_urdu.rename(columns={0:"translate_urdu"}, inplace=True)
df_select_urdu.reset_index(inplace=True)
#df_select_urdu.sort_values(by="arabic_frequency", ascending=False, inplace=True)  # used for arabic plain
df_select_urdu.sort_values(by="arabic_simple_frequency", ascending=False, inplace=True)
df_select_urdu.reset_index(drop=True, inplace=True)
df_select_urdu

Unnamed: 0,arabic_simple,arabic_simple_frequency,translate_urdu
0,من,2763,سے
1,الله,2153,اللہ
2,في,1185,میں
3,ما,1013,جو
4,إن,966,بیشک
...,...,...,...
14862,طردتهم,1,میں نے دور پھینک دیا ان کو
14863,طرف,1,نظر
14864,طرفا,1,ایک گروہ کو۔ ایک حصے کو
14865,طرفك,1,نگاہ تیری


In [112]:
#df_select_hindi = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency"]).apply(lambda x: x['translate_hindi'].value_counts(ascending=False).index[0]))  # used for arabic plain
df_select_hindi = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic_simple","arabic_simple_frequency"]).apply(lambda x: x['translate_hindi'].value_counts(ascending=False).index[0]))
df_select_hindi.rename(columns={0:"translate_hindi"}, inplace=True)
df_select_hindi.reset_index(inplace=True)
#df_select_hindi.sort_values(by="arabic_frequency", ascending=False, inplace=True)  # used for arabic plain
df_select_hindi.sort_values(by="arabic_simple_frequency", ascending=False, inplace=True)
df_select_hindi.reset_index(drop=True, inplace=True)
df_select_hindi

Unnamed: 0,arabic_simple,arabic_simple_frequency,translate_hindi
0,من,2763,जो
1,الله,2153,अल्लाह
2,في,1185,ज़मीन में
3,ما,1013,जो
4,إن,966,बेशक
...,...,...,...
14862,طردتهم,1,दूर कर दिया मैंने उन्हें
14863,طرف,1,झुकी आँख
14864,طرفا,1,एक हिस्सा
14865,طرفك,1,नज़र आपकी


In [113]:
#df_select_indonesian = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency"]).apply(lambda x: x['translate_indonesian'].value_counts(ascending=False).index[0]))  # used for arabic plain
df_select_indonesian = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic_simple","arabic_simple_frequency"]).apply(lambda x: x['translate_indonesian'].value_counts(ascending=False).index[0]))
df_select_indonesian.rename(columns={0:"translate_indonesian"}, inplace=True)
df_select_indonesian.reset_index(inplace=True)
#df_select_indonesian.sort_values(by="arabic_frequency", ascending=False, inplace=True)  # used for arabic plain
df_select_indonesian.sort_values(by="arabic_simple_frequency", ascending=False, inplace=True)
df_select_indonesian.reset_index(drop=True, inplace=True)
df_select_indonesian

Unnamed: 0,arabic_simple,arabic_simple_frequency,translate_indonesian
0,من,2763,dari
1,الله,2153,Allah
2,في,1185,dalam
3,ما,1013,apa
4,إن,966,sesungguhnya
...,...,...,...
14862,طردتهم,1,aku mengusir mereka
14863,طرف,1,pandangan mata
14864,طرفا,1,golongan
14865,طرفك,1,matamu


In [114]:
#df_select_bangla = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency"]).apply(lambda x: x['translate_bangla'].value_counts(ascending=False).index[0]))  # used for arabic plain
df_select_bangla = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic_simple","arabic_simple_frequency"]).apply(lambda x: x['translate_bangla'].value_counts(ascending=False).index[0]))
df_select_bangla.rename(columns={0:"translate_bangla"}, inplace=True)
df_select_bangla.reset_index(inplace=True)
#df_select_bangla.sort_values(by="arabic_frequency", ascending=False, inplace=True)  # used for arabic plain
df_select_bangla.sort_values(by="arabic_simple_frequency", ascending=False, inplace=True)
df_select_bangla.reset_index(drop=True, inplace=True)
df_select_bangla

Unnamed: 0,arabic_simple,arabic_simple_frequency,translate_bangla
0,من,2763,থেকে
1,الله,2153,আল্লাহ
2,في,1185,মধ্যে
3,ما,1013,যা
4,إن,966,নিশ্চয়ই
...,...,...,...
14862,طردتهم,1,তাদেরকে আমি তাড়িয়ে দিই
14863,طرف,1,দৃষ্টি
14864,طرفا,1,একটি অংশকে
14865,طرفك,1,"আপনার চোখের পলক"""


In [115]:
#df_select_turkish = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency"]).apply(lambda x: x['translate_turkish'].value_counts(ascending=False).index[0]))  # used for arabic plain
df_select_turkish = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic_simple","arabic_simple_frequency"]).apply(lambda x: x['translate_turkish'].value_counts(ascending=False).index[0]))
df_select_turkish.rename(columns={0:"translate_turkish"}, inplace=True)
df_select_turkish.reset_index(inplace=True)
#df_select_turkish.sort_values(by="arabic_frequency", ascending=False, inplace=True)  # used for arabic plain
df_select_turkish.sort_values(by="arabic_simple_frequency", ascending=False, inplace=True)
df_select_turkish.reset_index(drop=True, inplace=True)
df_select_turkish

Unnamed: 0,arabic_simple,arabic_simple_frequency,translate_turkish
0,من,2763,hiçbir
1,الله,2153,Allah
2,في,1185,içinde
3,ما,1013,şeyleri
4,إن,966,şüphesiz
...,...,...,...
14862,طردتهم,1,onları kovsam
14863,طرف,1,göz ucuyla
14864,طرفا,1,bir kısmını
14865,طرفك,1,gözünü


In [116]:
#df_select_russian = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic","arabic_simple","arabic_frequency"]).apply(lambda x: x['translate_russian'].value_counts(ascending=False).index[0]))  # used for arabic plain
df_select_russian = pd.DataFrame(df_arabic_count_translate_merge.groupby(["arabic_simple","arabic_simple_frequency"]).apply(lambda x: x['translate_russian'].value_counts(ascending=False).index[0]))
df_select_russian.rename(columns={0:"translate_russian"}, inplace=True)
df_select_russian.reset_index(inplace=True)
#df_select_russian.sort_values(by="arabic_frequency", ascending=False, inplace=True)  # used for arabic plain
df_select_russian.sort_values(by="arabic_simple_frequency", ascending=False, inplace=True)
df_select_russian.reset_index(drop=True, inplace=True)
df_select_russian

Unnamed: 0,arabic_simple,arabic_simple_frequency,translate_russian
0,من,2763,из
1,الله,2153,Аллах
2,في,1185,в
3,ما,1013,"то, что"
4,إن,966,"Поистине,"
...,...,...,...
14862,طردتهم,1,я прогоню их?
14863,طرف,1,скрытыми
14864,طرفا,1,одну часть
14865,طرفك,1,твой взор».


In [117]:
dfs = [df_select_english,df_select_urdu,df_select_hindi,df_select_indonesian,df_select_bangla,df_select_turkish,df_select_russian]

In [118]:
#df_all_translate_merge = reduce(lambda  left,right: pd.merge(left,right, on=['arabic','arabic_simple','arabic_simple_frequency'], how='inner'), dfs)  # left,right make left to right merge, used for arabic plain
df_all_translate_merge = reduce(lambda  left,right: pd.merge(left,right, on=['arabic_simple','arabic_simple_frequency'], how='inner'), dfs)  # left,right make left to right merge
#df_all_translate_merge = reduce(lambda  right,left: pd.merge(left,right, on=['word'], how='outer'), dfs)  # right,left make right to left merge
df_all_translate_merge.drop_duplicates(inplace=True)
df_all_translate_merge

Unnamed: 0,arabic_simple,arabic_simple_frequency,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,من,2763,from,سے,जो,dari,থেকে,hiçbir,из
1,الله,2153,Allah,اللہ,अल्लाह,Allah,আল্লাহ,Allah,Аллах
2,في,1185,in,میں,ज़मीन में,dalam,মধ্যে,içinde,в
3,ما,1013,what,جو,जो,apa,যা,şeyleri,"то, что"
4,إن,966,Indeed,بیشک,बेशक,sesungguhnya,নিশ্চয়ই,şüphesiz,"Поистине,"
...,...,...,...,...,...,...,...,...,...
14862,طردتهم,1,I drove them away,میں نے دور پھینک دیا ان کو,दूर कर दिया मैंने उन्हें,aku mengusir mereka,তাদেরকে আমি তাড়িয়ে দিই,onları kovsam,я прогоню их?
14863,طرف,1,a glance,نظر,झुकी आँख,pandangan mata,দৃষ্টি,göz ucuyla,скрытыми
14864,طرفا,1,a part,ایک گروہ کو۔ ایک حصے کو,एक हिस्सा,golongan,একটি অংশকে,bir kısmını,одну часть
14865,طرفك,1,your glance,نگاہ تیری,नज़र आपकी,matamu,"আপনার চোখের পলক""",gözünü,твой взор».


In [119]:
df_all_translate_merge.to_excel("Quran_Word_Translate_All.xlsx", index=False)

In [120]:
df_all_translate_merge_select = df_all_translate_merge.head(305)

In [121]:
df_all_translate_merge_select.to_excel("Quran_Word_Translate_300.xlsx", index=False) 

#### Quran Root With Related Arabic Word Translate

In [122]:
df_all_translate_merge = pd.read_excel("Quran_Word_Translate_All.xlsx")  # read previous output data
df_all_translate_merge

Unnamed: 0,arabic_simple,arabic_simple_frequency,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,من,2763,from,سے,जो,dari,থেকে,hiçbir,из
1,الله,2153,Allah,اللہ,अल्लाह,Allah,আল্লাহ,Allah,Аллах
2,في,1185,in,میں,ज़मीन में,dalam,মধ্যে,içinde,в
3,ما,1013,what,جو,जो,apa,যা,şeyleri,"то, что"
4,إن,966,Indeed,بیشک,बेशक,sesungguhnya,নিশ্চয়ই,şüphesiz,"Поистине,"
...,...,...,...,...,...,...,...,...,...
14862,طردتهم,1,I drove them away,میں نے دور پھینک دیا ان کو,दूर कर दिया मैंने उन्हें,aku mengusir mereka,তাদেরকে আমি তাড়িয়ে দিই,onları kovsam,я прогоню их?
14863,طرف,1,a glance,نظر,झुकी आँख,pandangan mata,দৃষ্টি,göz ucuyla,скрытыми
14864,طرفا,1,a part,ایک گروہ کو۔ ایک حصے کو,एक हिस्सा,golongan,একটি অংশকে,bir kısmını,одну часть
14865,طرفك,1,your glance,نگاہ تیری,नज़र आपकी,matamu,"আপনার চোখের পলক""",gözünü,твой взор».


In [123]:
df_quran_master = pd.read_excel("/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Arabic/Quran/Flash Cards/Data/Master 17052022.xlsx")
df_master_root_data = df_quran_master.loc[:,["surah","ayah","word rank","surah latin","tanzil_clean","tanzil_plain","root_arabic","root"]]
df_master_root_data

Unnamed: 0,surah,ayah,word rank,surah latin,tanzil_clean,tanzil_plain,root_arabic,root
0,1,1,1,Al-Fatiha,بسم,بِسْمِ,سمو,smw
1,1,1,2,Al-Fatiha,الله,اللَّهِ,اله,Alh
2,1,1,3,Al-Fatiha,الرحمن,الرَّحْمَٰنِ,رحم,rHm
3,1,1,4,Al-Fatiha,الرحيم,الرَّحِيمِ,رحم,rHm
4,1,2,1,Al-Fatiha,الحمد,الْحَمْدُ,حمد,Hmd
...,...,...,...,...,...,...,...,...
78242,114,5,4,An-Nas,صدور,صُدُورِ,صدر,Sdr
78243,114,5,5,An-Nas,الناس,النَّاسِ,نوس,nws
78244,114,6,1,An-Nas,من,مِنَ,,
78245,114,6,2,An-Nas,الجنة,الْجِنَّةِ,جنن,jnn


In [127]:
df_arabic_root_count = df_master_root_data["root_arabic"].value_counts(ascending=False).reset_index()
df_arabic_root_count.rename(columns={"index":"root_arabic","root_arabic":"root_arabic_frequency"}, inplace=True)
df_arabic_root_count_select = df_arabic_root_count.iloc[1:100,]
df_arabic_root_count_select

Unnamed: 0,root_arabic,root_arabic_frequency
1,قول,1722
2,كون,1390
3,ربب,980
4,امن,879
5,علم,854
...,...,...
95,عزز,119
96,امم,119
97,جزي,118
98,ابو,117


In [128]:
df_arabic_root_count_select.to_excel("Quran_Root_99.xlsx", index=False)

In [129]:
df_root_select_data_merge = pd.merge(df_arabic_root_count_select, df_master_root_data, how="left", on=["root_arabic"])
df_root_select_data_merge.rename(columns={"tanzil_plain":"arabic","tanzil_clean":"arabic_simple"}, inplace=True)
df_root_select_data_merge.drop_duplicates(inplace=True)
df_root_select_data_merge = df_root_select_data_merge.iloc[:,[0,8,1,2,3,4,5,6,7]]
df_root_select_data_merge 

Unnamed: 0,root_arabic,root,root_arabic_frequency,surah,ayah,word rank,surah latin,arabic_simple,arabic
0,قول,qwl,1722,2,8,4,Al-Baqara,يقول,يَقُولُ
1,قول,qwl,1722,2,11,2,Al-Baqara,قيل,قِيلَ
2,قول,qwl,1722,2,11,8,Al-Baqara,قالوا,قَالُوا
3,قول,qwl,1722,2,13,2,Al-Baqara,قيل,قِيلَ
4,قول,qwl,1722,2,13,8,Al-Baqara,قالوا,قَالُوا
...,...,...,...,...,...,...,...,...,...
27372,نهر,nhr,113,85,11,11,Al-Burooj,الأنهار,الْأَنْهَارُ
27373,نهر,nhr,113,91,3,1,Ash-Shams,والنهار,وَالنَّهَارِ
27374,نهر,nhr,113,92,2,1,Al-Lail,والنهار,وَالنَّهَارِ
27375,نهر,nhr,113,93,10,4,Ad-Dhuha,تنهر,تَنْهَرْ


In [131]:
#df_root_select_translate_merge = pd.merge(df_root_select_data_merge, df_all_translate_merge, how="left", on=["arabic_simple","arabic"])  # used for arabic
df_root_select_translate_merge = pd.merge(df_root_select_data_merge, df_all_translate_merge, how="left", on=["arabic_simple"])
df_root_select_translate_merge.drop_duplicates(inplace=True)
df_root_select_translate_merge 

Unnamed: 0,root_arabic,root,root_arabic_frequency,surah,ayah,word rank,surah latin,arabic_simple,arabic,arabic_simple_frequency,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,قول,qwl,1722,2,8,4,Al-Baqara,يقول,يَقُولُ,39,say,وہ کہتا ہے,वो कहता है,berkata,বলেন,der,говорит:
1,قول,qwl,1722,2,11,2,Al-Baqara,قيل,قِيلَ,34,it is said,کہا گیا,कहा जाता है,dikatakan,বলা হয়,dendiği,говорится
2,قول,qwl,1722,2,11,8,Al-Baqara,قالوا,قَالُوا,250,They said,انہوں نے کہا,उन्होंने कहा,mereka berkata,তারা বললো,dediler,Сказали они:
3,قول,qwl,1722,2,13,2,Al-Baqara,قيل,قِيلَ,34,it is said,کہا گیا,कहा जाता है,dikatakan,বলা হয়,dendiği,говорится
4,قول,qwl,1722,2,13,8,Al-Baqara,قالوا,قَالُوا,250,They said,انہوں نے کہا,उन्होंने कहा,mereka berkata,তারা বললো,dediler,Сказали они:
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27372,نهر,nhr,113,85,11,11,Al-Burooj,الأنهار,الْأَنْهَارُ,43,the rivers,نہریں,नहरें,sungai-sungai,ঝর্ণাসমূহ,ırmaklar,"реки,"
27373,نهر,nhr,113,91,3,1,Ash-Shams,والنهار,وَالنَّهَارِ,26,and the day,اور دن کو,और दिन को,dan siang,ও দিনের,ve gündüzü,"и день,"
27374,نهر,nhr,113,92,2,1,Al-Lail,والنهار,وَالنَّهَارِ,26,and the day,اور دن کو,और दिन को,dan siang,ও দিনের,ve gündüzü,"и день,"
27375,نهر,nhr,113,93,10,4,Ad-Dhuha,تنهر,تَنْهَرْ,1,repel,جھڑکو,आप झिड़कये (उसे),kamu menghardik,তিরস্কার করো,azarlama,отгоняй!


In [132]:
df_root_select_translate_merge.to_excel("Quran_Root_Word_Translate_99.xlsx", index=False) 

In [38]:
df_root_data_merge = pd.merge(df_arabic_root_count, df_master_root_data, how="left", on=["root_arabic"])
df_root_data_merge.rename(columns={"tanzil_plain":"arabic","tanzil_clean":"arabic_simple"}, inplace=True)
df_root_data_merge.drop_duplicates(inplace=True)
df_root_data_merge = df_root_data_merge.iloc[:,[0,8,1,2,3,4,5,6,7]]
df_root_data_merge 

Unnamed: 0,root_arabic,root,root_arabic_frequency,surah,ayah,word rank,surah latin,arabic_simple,arabic
0,اله,Alh,2851,1,1,2,Al-Fatiha,الله,اللَّهِ
1,اله,Alh,2851,1,2,2,Al-Fatiha,لله,لِلَّهِ
2,اله,Alh,2851,2,7,2,Al-Baqara,الله,اللَّهُ
3,اله,Alh,2851,2,8,6,Al-Baqara,بالله,بِاللَّهِ
4,اله,Alh,2851,2,9,2,Al-Baqara,الله,اللَّهَ
...,...,...,...,...,...,...,...,...,...
49963,هشش,h$$,1,20,18,6,Ta-Ha,وأهش,وَأَهُشُّ
49964,نعل,nEl,1,20,12,5,Ta-Ha,نعليك,نَعْلَيْكَ
49965,خلع,xlE,1,20,12,4,Ta-Ha,فاخلع,فَاخْلَعْ
49966,ثري,vry,1,20,6,12,Ta-Ha,الثرى,الثَّرَىٰ


In [133]:
#df_root_translate_merge = pd.merge(df_root_data_merge, df_all_translate_merge, how="left", on=["arabic_simple","arabic"])  # used for arabic
df_root_translate_merge = pd.merge(df_root_data_merge, df_all_translate_merge, how="left", on=["arabic_simple"])
df_root_translate_merge.drop_duplicates(inplace=True)
df_root_translate_merge

Unnamed: 0,root_arabic,root,root_arabic_frequency,surah,ayah,word rank,surah latin,arabic_simple,arabic,arabic_simple_frequency,translate_english,translate_urdu,translate_hindi,translate_indonesian,translate_bangla,translate_turkish,translate_russian
0,اله,Alh,2851,1,1,2,Al-Fatiha,الله,اللَّهِ,2153,Allah,اللہ,अल्लाह,Allah,আল্লাহ,Allah,Аллах
1,اله,Alh,2851,1,2,2,Al-Fatiha,لله,لِلَّهِ,116,to Allah,اللہ کے لیے,अल्लाह के लिए,bagi Allah,জন্যে আল্লাহর,Allah'a,"Аллаху,"
2,اله,Alh,2851,2,7,2,Al-Baqara,الله,اللَّهُ,2153,Allah,اللہ,अल्लाह,Allah,আল্লাহ,Allah,Аллах
3,اله,Alh,2851,2,8,6,Al-Baqara,بالله,بِاللَّهِ,139,in Allah,اللہ پر,अल्लाह पर,kepada Allah,আল্লাহই,Allah'a,в Аллаха
4,اله,Alh,2851,2,9,2,Al-Baqara,الله,اللَّهَ,2153,Allah,اللہ,अल्लाह,Allah,আল্লাহ,Allah,Аллах
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49963,هشش,h$$,1,20,18,6,Ta-Ha,وأهش,وَأَهُشُّ,1,and I bring down leaves,اور میں پتے جھاڑتا ہوں,और मैं पत्ते झाड़ता हूँ,dan aku memukul,এবং পাতা পাড়ি আমি,ve yaprak silkeliyorum,и сбиваю листья
49964,نعل,nEl,1,20,12,5,Ta-Ha,نعليك,نَعْلَيْكَ,1,your shoes,اپنے جوتوں کو,अपने दोनों जूते,kedua terompahmu,তোমার জুতা জোড়া,pabuçlarını,свои сандалии!
49965,خلع,xlE,1,20,12,4,Ta-Ha,فاخلع,فَاخْلَعْ,1,so remove,پس اتار دو,पस उतार दो,maka lepaskan,অতএব খুলে ফেলো তুমি,çıkar,Сними же
49966,ثري,vry,1,20,6,12,Ta-Ha,الثرى,الثَّرَىٰ,1,the soil,زمین کے۔ مٹی کے,गीली मिट्टी के,tanah,সিক্ত মাটির,toprağın,землей.


In [134]:
df_root_translate_merge.to_excel("Quran_Root_Word_Translate_All.xlsx", index=False) 

#### Quran Ayah Frequency

In [136]:
ar2bw = CharMapper.builtin_mapper('ar2bw')

In [137]:
type_file = "clean" # plain, minimal, clean

In [138]:
with open(f"/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Arabic/Quran/Text/Data/Quran Simple {type_file.capitalize()}.txt", "r", encoding="utf8") as file: # Quran Simple Clean.txt from Tanzil website
    arabic_text_simple =file.readlines()

In [139]:
df_tanzil_sent = pd.DataFrame(arabic_text_simple)
df_tanzil_sent

Unnamed: 0,0
0,1|1|بسم الله الرحمن الرحيم\n
1,1|2|الحمد لله رب العالمين\n
2,1|3|الرحمن الرحيم\n
3,1|4|مالك يوم الدين\n
4,1|5|إياك نعبد وإياك نستعين\n
...,...
6231,114|2|ملك الناس\n
6232,114|3|إله الناس\n
6233,114|4|من شر الوسواس الخناس\n
6234,114|5|الذي يوسوس في صدور الناس\n


In [140]:
df_tanzil_sent = pd.DataFrame(df_tanzil_sent.iloc[:,0].apply(lambda x: x.strip("\n")))
df_tanzil_sent

Unnamed: 0,0
0,1|1|بسم الله الرحمن الرحيم
1,1|2|الحمد لله رب العالمين
2,1|3|الرحمن الرحيم
3,1|4|مالك يوم الدين
4,1|5|إياك نعبد وإياك نستعين
...,...
6231,114|2|ملك الناس
6232,114|3|إله الناس
6233,114|4|من شر الوسواس الخناس
6234,114|5|الذي يوسوس في صدور الناس


In [141]:
df_tanzil_sent_nav = df_tanzil_sent.iloc[:,0].str.split("|", n=-1, expand=True)
df_tanzil_sent_nav.rename(columns={0:"num_1",1:"num_2",2:f"arabic_sent_tanzil_{type_file.lower()}"}, inplace=True)
df_tanzil_sent_nav

Unnamed: 0,num_1,num_2,arabic_sent_tanzil_clean
0,1,1,بسم الله الرحمن الرحيم
1,1,2,الحمد لله رب العالمين
2,1,3,الرحمن الرحيم
3,1,4,مالك يوم الدين
4,1,5,إياك نعبد وإياك نستعين
...,...,...,...
6231,114,2,ملك الناس
6232,114,3,إله الناس
6233,114,4,من شر الوسواس الخناس
6234,114,5,الذي يوسوس في صدور الناس


In [142]:
df_tanzil_sent_count = df_tanzil_sent_nav.loc[:,f"arabic_sent_tanzil_{type_file.lower()}"].value_counts(ascending=False).reset_index()
df_tanzil_sent_count.rename(columns={"index":f"arabic_sent_tanzil_{type_file.lower()}",f"arabic_sent_tanzil_{type_file.lower()}":"arabic_sent_frequency"}, inplace=True)
df_tanzil_sent_count

Unnamed: 0,arabic_sent_tanzil_clean,arabic_sent_frequency
0,فبأي آلاء ربكما تكذبان,31
1,ويل يومئذ للمكذبين,11
2,وإن ربك لهو العزيز الرحيم,8
3,فاتقوا الله وأطيعون,8
4,بسم الله الرحمن الرحيم حم,7
...,...,...
6050,وكل إنسان ألزمناه طائره في عنقه ونخرج له يوم ا...,1
6051,وجعلنا الليل والنهار آيتين فمحونا آية الليل وج...,1
6052,ويدع الإنسان بالشر دعاءه بالخير وكان الإنسان ع...,1
6053,وأن الذين لا يؤمنون بالآخرة أعتدنا لهم عذابا أ...,1


In [143]:
df_tanzil_sent_count.to_excel(f"Quran_{type_file.lower().capitalize()}_Ayahs_Frequency.xlsx", index=False)

#### Quran Twogram Threegram Select

In [42]:
df_quran_master = pd.read_excel("/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Arabic/Quran/Flash Cards/Data/Master 17052022.xlsx")
df_master_twogram_data = df_quran_master.loc[:,["twogram_string","twogram_english","twogram_turkish","twogram_urdu","twogram_hindi","twogram_indonesian","twogram_bangla","twogram_russian"]]
df_master_twogram_data

Unnamed: 0,twogram_string,twogram_english,twogram_turkish,twogram_urdu,twogram_hindi,twogram_indonesian,twogram_bangla,twogram_russian
0,بسم الله,In the name of God,Bismillah,خدا کے نام پر,भगवान के नाम पर,Atas nama tuhan,আল্লাহর নামে,Во имя Бога
1,الله الرحمن,God the Most Gracious,Tanrı en merhametli,خدا سب سے زیادہ مہربان,भगवान सबसे दयालु,Tuhan Yang Maha Pemurah,পরম করুণাময় ঈশ্বর,Бог Милостивый
2,الرحمن الرحيم,Most Merciful,Rahman ve Rahim,سب سے زیادہ رحم کرنے والا,परम दयालु,Maha Penyayang,পরম করুণাময়,Самый Милосердный
3,,,,,,,,
4,الحمد لله,Thank God,Allah'a şükür,خدا کا شکر ہے,सुकर है,Terima kasih Tuhan,সৃষ্টিকর্তাকে ধন্যবাদ,Слава Богу
...,...,...,...,...,...,...,...,...
78242,,,,,,,,
78243,,,,,,,,
78244,,,,,,,,
78245,,,,,,,,


In [43]:
df_master_twogram_count = df_master_twogram_data["twogram_string"].value_counts(ascending=False).reset_index()
df_master_twogram_count.rename(columns={"index":"twogram_string", "twogram_string":"twogram_string_frequency"}, inplace=True)
df_master_twogram_count

Unnamed: 0,twogram_string,twogram_string_frequency
0,إن الله,205
1,الذين آمنوا,184
2,في الأرض,176
3,الذين كفروا,134
4,السماوات والأرض,133
...,...,...
195,من بعدهم,17
196,ولو شاء,17
197,إن كان,17
198,رسول الله,17


In [44]:
df_master_twogram_count_translate_merge = pd.merge(df_master_twogram_count, df_master_twogram_data, how="left", on="twogram_string")
df_master_twogram_count_translate_merge.drop_duplicates(inplace=True)
df_master_twogram_count_translate_merge.reset_index(drop=True, inplace=True)
df_master_twogram_count_translate_merge

Unnamed: 0,twogram_string,twogram_string_frequency,twogram_english,twogram_turkish,twogram_urdu,twogram_hindi,twogram_indonesian,twogram_bangla,twogram_russian
0,إن الله,205,God is,Allah,خدا ہے,भगवान है,Tuhan adalah,উপাস্য নেই,Бог это
1,الذين آمنوا,184,who believed,kim inandı,جو ایمان لائے,जो विश्वास करता था,siapa yang percaya?,যারা বিশ্বাস করেছিল,кто верил
2,في الأرض,176,in Earth,Dünya'da,زمین میں,धरती में,di Bumi,পৃথিবীতে,на Земле
3,الذين كفروا,134,who disbelieved,kim inanmadı,جنہوں نے کفر کیا۔,जिसने अविश्वास किया,siapa yang tidak percaya?,যারা অবিশ্বাস করেছিল,кто не поверил
4,السماوات والأرض,133,Heavens and Earth,Gökler ve Dünya,آسمان اور زمین,स्वर्ग और पृथ्वी,Langit dan Bumi,আকাশ ও পৃথিবী,Небеса и Земля
...,...,...,...,...,...,...,...,...,...
195,من بعدهم,17,after them,onlardan sonra,ان کے بعد,उनके बाद,setelah mereka,তাদের পরে,после них
196,ولو شاء,17,if he wants,eğer isterse,اگر وہ چاہتا ہے,यदि वह चाहता है,jika dia mau,যদি সে চায়,если он хочет
197,إن كان,17,if,Eğer,اگر,अगर,jika,যদি,если
198,رسول الله,17,Messenger of God,Resulullah,خدا کے رسول,भगवान के दूत,Utusan Tuhan,রসূল সা,Посланник Бога


In [45]:
df_master_twogram_count_translate_merge.to_excel(f"Quran_Twogram_Translate_Frequency.xlsx", index=False)

In [46]:
df_master_threegram_data = df_quran_master.loc[:,["threegram_string","threegram_english","threegram_turkish","threegram_urdu","threegram_hindi","threegram_indonesian","threegram_bangla","threegram_russian"]]
df_master_threegram_data

Unnamed: 0,threegram_string,threegram_english,threegram_turkish,threegram_urdu,threegram_hindi,threegram_indonesian,threegram_bangla,threegram_russian
0,بسم الله الرحمن,"in the name of ALLAH, the most mercifull",merhametli olan Allah adıyla,اللہ کے نام سے جو سب سے زیادہ رحم کرنے والا ہے۔,"अल्लाह के नाम पर, सबसे दयालु",dengan menyebut nama ALLAH yang maha penyayang,পরম করুণাময় আল্লাহর নামে,во имя АЛЛАХА Милостивого
1,,,,,,,,
2,,,,,,,,
3,,,,,,,,
4,,,,,,,,
...,...,...,...,...,...,...,...,...
78242,,,,,,,,
78243,,,,,,,,
78244,,,,,,,,
78245,,,,,,,,


In [47]:
df_master_threegram_count = df_master_threegram_data["threegram_string"].value_counts(ascending=False).reset_index()
df_master_threegram_count.rename(columns={"index":"threegram_string", "threegram_string":"threegram_string_frequency"}, inplace=True)
df_master_threegram_count

Unnamed: 0,threegram_string,threegram_string_frequency
0,بسم الله الرحمن,114
1,يا أيها الذين,92
2,أيها الذين آمنوا,89
3,من دون الله,71
4,على كل شيء,52
...,...,...
195,الأرض بعد موتها,8
196,إن وعد الله,8
197,إن الله هو,8
198,إلا أن يشاء,8


In [48]:
df_master_threegram_count_translate_merge = pd.merge(df_master_threegram_count, df_master_threegram_data, how="left", on="threegram_string")
df_master_threegram_count_translate_merge.drop_duplicates(inplace=True)
df_master_threegram_count_translate_merge.reset_index(drop=True, inplace=True)
df_master_threegram_count_translate_merge

Unnamed: 0,threegram_string,threegram_string_frequency,threegram_english,threegram_turkish,threegram_urdu,threegram_hindi,threegram_indonesian,threegram_bangla,threegram_russian
0,بسم الله الرحمن,114,"in the name of ALLAH, the most mercifull",merhametli olan Allah adıyla,اللہ کے نام سے جو سب سے زیادہ رحم کرنے والا ہے۔,"अल्लाह के नाम पर, सबसे दयालु",dengan menyebut nama ALLAH yang maha penyayang,পরম করুণাময় আল্লাহর নামে,во имя АЛЛАХА Милостивого
1,يا أيها الذين,92,O you who,Ey sen kim,اے جو,हे तुम कौन,Wahai kamu siapa,হে আপনি যারা,"О ты, кто"
2,أيها الذين آمنوا,89,O you who believe,Ey inananlar,اے ایمان والو!,हे तुम जो विश्वास करते हो,Wahai orang-orang yang beriman,হে ঈমানদারগণ,"О вы, кто верит"
3,من دون الله,71,without God,Tanrı olmadan,خدا کے بغیر,भगवान के बिना,tanpa Tuhan,ঈশ্বর ছাড়া,без Бога
4,على كل شيء,52,for everything,herşey için,ہر چیز کے لئے,प्रत्येक वस्तु के लिए,untuk semuanya,সব কিছুর জন্য,За все
...,...,...,...,...,...,...,...,...,...
195,الأرض بعد موتها,8,earth after death,ölümden sonra dünya,موت کے بعد زمین,मृत्यु के बाद पृथ्वी,bumi setelah kematian,মৃত্যুর পরে পৃথিবী,земля после смерти
196,إن وعد الله,8,God's promise,Tanrı'nın vaadi,خدا کا وعدہ,भगवान का वादा,janji tuhan,ঈশ্বরের প্রতিশ্রুতি,обещание Бога
197,إن الله هو,8,God is,Tanrı,خدا ہے,भगवान है,Tuhan adalah,উপাস্য নেই,Бог это
198,إلا أن يشاء,8,except that he wills,onun dilemesi dışında,سوائے اس کے کہ وہ چاہے,सिवाय इसके कि वह चाहता है,kecuali dia berkehendak,ব্যতীত যে সে ইচ্ছা করে,"кроме того, что он хочет"


In [49]:
df_master_threegram_count_translate_merge.to_excel(f"Quran_Threegram_Translate_Frequency.xlsx", index=False)

#### Copy Move And Delete

In [144]:
output_file = glob.glob(f"Quran_*.xlsx")
output_file

['Quran_Word_Translate_All.xlsx',
 'Quran_Word_Translate_300.xlsx',
 'Quran_Root_99.xlsx',
 'Quran_Root_Word_Translate_99.xlsx',
 'Quran_Root_Word_Translate_All.xlsx',
 'Quran_Clean_Ayahs_Frequency.xlsx']

In [145]:
for y in output_file:
    source = y # source directory
    destination = path
    shutil.copy2(source, destination)

In [146]:
for z in output_file:
    try:
        os.remove(z)
    except:
        pass