<a href="https://colab.research.google.com/github/Achmad96/information-retrieval/blob/master/IR_Ayat_Alqur'an_id_en.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
from pathlib import Path

drive_path = Path('/content/drive')
drive.mount(str(drive_path))
drive_path /= 'MyDrive'

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
dataset_root = drive_path   / 'datasets'
dataset_path = dataset_root / 'quran_surah_dataset'

In [4]:
id_df = pd.read_csv(f'{dataset_path}/id.indonesian.txt', sep='|',names=['surah','verse','translations'])
id_df.head()

Unnamed: 0,surah,verse,translations
0,1,1,Dengan menyebut nama Allah Yang Maha Pemurah l...
1,1,2,"Segala puji bagi Allah, Tuhan semesta alam."
2,1,3,Maha Pemurah lagi Maha Penyayang.
3,1,4,Yang menguasai di Hari Pembalasan.
4,1,5,"Hanya Engkaulah yang kami sembah, dan hanya ke..."


In [5]:
en_df = pd.read_csv(f'{dataset_path}/en.sahih.txt', sep='|', names=['surah','verse','translations'])
en_df.head()

Unnamed: 0,surah,verse,translations
0,1,1,"In the name of Allah, the Entirely Merciful, t..."
1,1,2,"[All] praise is [due] to Allah, Lord of the wo..."
2,1,3,"The Entirely Merciful, the Especially Merciful,"
3,1,4,Sovereign of the Day of Recompense.
4,1,5,It is You we worship and You we ask for help.


In [6]:
df = pd.concat([id_df, en_df]).sort_values(by=['surah','verse']).reset_index(drop=True)
df.head()

Unnamed: 0,surah,verse,translations
0,1,1,Dengan menyebut nama Allah Yang Maha Pemurah l...
1,1,1,"In the name of Allah, the Entirely Merciful, t..."
2,1,2,"Segala puji bagi Allah, Tuhan semesta alam."
3,1,2,"[All] praise is [due] to Allah, Lord of the wo..."
4,1,3,Maha Pemurah lagi Maha Penyayang.


In [7]:
vectorizer = TfidfVectorizer()

In [8]:
translations = df['translations']
vectorized_translations = vectorizer.fit_transform(translations)

In [9]:
queries = ['[All] praise is [due] to Allah','Dengan menyebut nama Allah Yang Maha Pemurah']

In [10]:
vectorized_input = vectorizer.transform(queries)

In [11]:
print('Calculate cosine similarity...')
similarity_scores = cosine_similarity(vectorized_input, vectorized_translations)

Calculate cosine similarity...


In [12]:
import textwrap
THRESHOLD = 0.3

for query_index, query_text in enumerate(queries):
  high_score_indices = np.where(similarity_scores[query_index] >= THRESHOLD)
  high_score_indices = high_score_indices[0]
  print(f"--- Results for Query {query_index + 1} ({len(high_score_indices)}) ---")

  if len(high_score_indices) == 0:
    print(f"No entries found with a similarity score greater or equals to {THRESHOLD} for this query.")
  else:
    for idx in high_score_indices:
      surah = df.loc[idx, 'surah']
      verse = df.loc[idx, 'verse']
      translation = df.loc[idx, 'translations']
      score = similarity_scores[query_index, idx]
      translation_text = textwrap.fill(translation, width=130)
      print(f'Surah: {surah}')
      print(f'Verse: {verse}')
      print(f'Translations: {translation_text}')
      print(f'Score: {score:.4f}')
      print()
  print()

--- Results for Query 1 (18) ---
Surah: 1
Verse: 2
Translations: [All] praise is [due] to Allah, Lord of the worlds -
Score: 0.8204

Surah: 6
Verse: 1
Translations: [All] praise is [due] to Allah, who created the heavens and the earth and made the darkness and the light. Then those who
disbelieve equate [others] with their Lord.
Score: 0.4272

Surah: 16
Verse: 52
Translations: And to Him belongs whatever is in the heavens and the earth, and to Him is [due] worship constantly. Then is it other than Allah
that you fear?
Score: 0.3016

Surah: 18
Verse: 1
Translations: [All] praise is [due] to Allah, who has sent down upon His Servant the Book and has not made therein any deviance.
Score: 0.4816

Surah: 27
Verse: 15
Translations: And We had certainly given to David and Solomon knowledge, and they said, "Praise [is due] to Allah, who has favored us over many
of His believing servants."
Score: 0.3809

Surah: 27
Verse: 93
Translations: And say, "[All] praise is [due] to Allah. He will show yo