### Morphology And Tanzil Analysis

In [4]:
#!pip install lang-trans
#!pip install camel-tools

In [1]:
import pandas as pd
import numpy as np
import re
from camel_tools.utils.charmap import CharMapper
from lang_trans.arabic import buckwalter
import nltk
from nltk import word_tokenize
from nltk import ngrams

In [2]:
df_tanzil_word = pd.read_excel("Arabic_And_Buckwalter_Word_Tanzil_Simple.xlsx")
df_tanzil_word

Unnamed: 0,num_1,num_2,num_3,arabic_word_tanzil_simple,buckwalter_word_tanzil_simple
0,1,1,1,بسم,bsm
1,1,1,2,الله,Allh
2,1,1,3,الرحمن,AlrHmn
3,1,1,4,الرحيم,AlrHym
4,1,2,1,الحمد,AlHmd
...,...,...,...,...,...
78243,114,5,4,صدور,Sdwr
78244,114,5,5,الناس,AlnAs
78245,114,6,1,من,mn
78246,114,6,2,الجنة,Aljnp


In [3]:
df_morp_word = pd.read_excel("Arabic_And_Buckwalter_Word_Morphology.xlsx")
df_morp_word

Unnamed: 0,num_1,num_2,num_3,arabic_word_morphology,buckwalter_word_morphology
0,1,1,1,بِسْمِ,bisomi
1,1,1,2,ٱللَّهِ,{ll~ahi
2,1,1,3,ٱلرَّحْمَٰنِ,{lr~aHoma`ni
3,1,1,4,ٱلرَّحِيمِ,{lr~aHiymi
4,1,2,1,ٱلْحَمْدُ,{loHamodu
...,...,...,...,...,...
77424,114,5,4,صُدُورِ,Suduwri
77425,114,5,5,ٱلنَّاسِ,{ln~aAsi
77426,114,6,1,مِنَ,mina
77427,114,6,2,ٱلْجِنَّةِ,{lojin~api


In [4]:
df_tanzil_morp_word_merge = pd.merge(df_tanzil_word,df_morp_word, how="left", on=["num_1","num_2","num_3"])
df_tanzil_morp_word_merge

Unnamed: 0,num_1,num_2,num_3,arabic_word_tanzil_simple,buckwalter_word_tanzil_simple,arabic_word_morphology,buckwalter_word_morphology
0,1,1,1,بسم,bsm,بِسْمِ,bisomi
1,1,1,2,الله,Allh,ٱللَّهِ,{ll~ahi
2,1,1,3,الرحمن,AlrHmn,ٱلرَّحْمَٰنِ,{lr~aHoma`ni
3,1,1,4,الرحيم,AlrHym,ٱلرَّحِيمِ,{lr~aHiymi
4,1,2,1,الحمد,AlHmd,ٱلْحَمْدُ,{loHamodu
...,...,...,...,...,...,...,...
78243,114,5,4,صدور,Sdwr,صُدُورِ,Suduwri
78244,114,5,5,الناس,AlnAs,ٱلنَّاسِ,{ln~aAsi
78245,114,6,1,من,mn,مِنَ,mina
78246,114,6,2,الجنة,Aljnp,ٱلْجِنَّةِ,{lojin~api


In [5]:
df_tanzil_sent = pd.read_excel("Arabic_And_Buckwalter_Sent_Tanzil_Simple.xlsx")
df_tanzil_sent

Unnamed: 0,num_1,num_2,arabic_sent_tanzil_simple,buckwalter_sent_tanzil_simple
0,1,1,بسم الله الرحمن الرحيم,bsm Allh AlrHmn AlrHym
1,1,2,الحمد لله رب العالمين,AlHmd llh rb AlEAlmyn
2,1,3,الرحمن الرحيم,AlrHmn AlrHym
3,1,4,مالك يوم الدين,mAlk ywm Aldyn
4,1,5,إياك نعبد وإياك نستعين,<yAk nEbd w<yAk nstEyn
...,...,...,...,...
6231,114,2,ملك الناس,mlk AlnAs
6232,114,3,إله الناس,<lh AlnAs
6233,114,4,من شر الوسواس الخناس,mn $r AlwswAs AlxnAs
6234,114,5,الذي يوسوس في صدور الناس,Al*y ywsws fy Sdwr AlnAs


In [6]:
df_morp_sent = pd.read_excel("Arabic_And_Buckwalter_Sent_Morphology.xlsx")
df_morp_sent

Unnamed: 0,num_1,num_2,arabic_sent_morphology,buckwalter_sent_morphology
0,1,1,بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ,bisomi {ll~ahi {lr~aHoma`ni {lr~aHiymi
1,1,2,ٱلْحَمْدُ لِلَّهِ رَبِّ ٱلْعَٰلَمِينَ,{loHamodu lil~ahi rab~i {loEa`lamiyna
2,1,3,ٱلرَّحْمَٰنِ ٱلرَّحِيمِ,{lr~aHoma`ni {lr~aHiymi
3,1,4,مَٰلِكِ يَوْمِ ٱلدِّينِ,ma`liki yawomi {ld~iyni
4,1,5,إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ,<iy~aAka naEobudu wa<iy~aAka nasotaEiynu
...,...,...,...,...
6231,114,2,مَلِكِ ٱلنَّاسِ,maliki {ln~aAsi
6232,114,3,إِلَٰهِ ٱلنَّاسِ,<ila`hi {ln~aAsi
6233,114,4,مِن شَرِّ ٱلْوَسْوَاسِ ٱلْخَنَّاسِ,min $ar~i {lowasowaAsi {loxan~aAsi
6234,114,5,ٱلَّذِى يُوَسْوِسُ فِى صُدُورِ ٱلنَّاسِ,{l~a*iY yuwasowisu fiY Suduwri {ln~aAsi


In [7]:
df_tanzil_morp_sent_merge = pd.merge(df_tanzil_sent,df_morp_sent, how="inner", on=["num_1","num_2"])
df_tanzil_morp_sent_merge

Unnamed: 0,num_1,num_2,arabic_sent_tanzil_simple,buckwalter_sent_tanzil_simple,arabic_sent_morphology,buckwalter_sent_morphology
0,1,1,بسم الله الرحمن الرحيم,bsm Allh AlrHmn AlrHym,بِسْمِ ٱللَّهِ ٱلرَّحْمَٰنِ ٱلرَّحِيمِ,bisomi {ll~ahi {lr~aHoma`ni {lr~aHiymi
1,1,2,الحمد لله رب العالمين,AlHmd llh rb AlEAlmyn,ٱلْحَمْدُ لِلَّهِ رَبِّ ٱلْعَٰلَمِينَ,{loHamodu lil~ahi rab~i {loEa`lamiyna
2,1,3,الرحمن الرحيم,AlrHmn AlrHym,ٱلرَّحْمَٰنِ ٱلرَّحِيمِ,{lr~aHoma`ni {lr~aHiymi
3,1,4,مالك يوم الدين,mAlk ywm Aldyn,مَٰلِكِ يَوْمِ ٱلدِّينِ,ma`liki yawomi {ld~iyni
4,1,5,إياك نعبد وإياك نستعين,<yAk nEbd w<yAk nstEyn,إِيَّاكَ نَعْبُدُ وَإِيَّاكَ نَسْتَعِينُ,<iy~aAka naEobudu wa<iy~aAka nasotaEiynu
...,...,...,...,...,...,...
6231,114,2,ملك الناس,mlk AlnAs,مَلِكِ ٱلنَّاسِ,maliki {ln~aAsi
6232,114,3,إله الناس,<lh AlnAs,إِلَٰهِ ٱلنَّاسِ,<ila`hi {ln~aAsi
6233,114,4,من شر الوسواس الخناس,mn $r AlwswAs AlxnAs,مِن شَرِّ ٱلْوَسْوَاسِ ٱلْخَنَّاسِ,min $ar~i {lowasowaAsi {loxan~aAsi
6234,114,5,الذي يوسوس في صدور الناس,Al*y ywsws fy Sdwr AlnAs,ٱلَّذِى يُوَسْوِسُ فِى صُدُورِ ٱلنَّاسِ,{l~a*iY yuwasowisu fiY Suduwri {ln~aAsi
