In [None]:
import numpy as np
import pandas as pd
import time
import seaborn as sns
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModel
import torch
import faiss
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer, models
import torch.nn.functional as F


# Data Load

In [2]:
df = pd.read_csv('../quran_id_full.csv')
df = df[['id', 'suraId', 'verseID', 'indoText']]

In [20]:
query = "apa itu jalan yang lurus"

In [7]:
corpus = df.indoText.tolist()

# Model Init

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [44]:
model1_name = 'sentence-transformers/msmarco-distilbert-base-dot-prod-v3'
model2_name = 'denaya/indoSBERT-large'

In [6]:
models = [model1_name, model2_name]

# SBERT Method Compare

difference between sbert embedding with classic method

## SBERT embedding

In [8]:
sbert_model = SentenceTransformer(model1_name, device=device)
encoded_data = sbert_model.encode(corpus, convert_to_tensor=True)
# encoded_data = np.asarray(encoded_data.astype('float32'))

In [21]:
encoded_query = sbert_model.encode(query, convert_to_tensor=True)
similarity_scores = sbert_model.similarity(encoded_query, encoded_data)[0]
scores, indices = torch.topk(similarity_scores, k=10)

In [22]:
for score, idx in zip(scores, indices):
    print(f"Ayat: {corpus[idx]}\nScore: {score}\n")

Ayat: (yang berada) di atas jalan yang lurus,
Score: 55.79570007324219

Ayat: Dan hak Allah menerangkan jalan yang lurus, dan di antaranya ada (jalan) yang menyimpang. Dan jika Dia menghendaki, tentu Dia memberi petunjuk kamu semua (ke jalan yang benar).
Score: 53.78508758544922

Ayat: (yaitu) bagi siapa di antara kamu yang menghendaki menempuh jalan yang lurus.
Score: 52.73957061767578

Ayat: (Isa berkata), “Dan sesungguhnya Allah itu Tuhanku dan Tuhanmu, maka sembahlah Dia. Ini adalah jalan yang lurus.”
Score: 52.0804328918457

Ayat: Dan di antara kami ada yang Islam dan ada yang menyimpang dari kebenaran. Siapa yang Islam, maka mereka itu telah memilih jalan yang lurus.
Score: 51.20023727416992

Ayat: Sesungguhnya orang-orang yang kafir itu, menginfakkan harta mereka untuk menghalang-halangi (orang) dari jalan Allah. Mereka akan (terus) menginfakkan harta itu, kemudian mereka akan menyesal sendiri, dan akhirnya mereka akan dikalahkan. Ke dalam neraka Jahanamlah orang-orang kafir itu

## Denaya

In [47]:
sbert_model2 = SentenceTransformer(model2_name, device=device)
encoded_data = sbert_model2.encode(corpus, convert_to_tensor=True)

model.safetensors:   5%|4         | 62.9M/1.34G [00:00<?, ?B/s]

In [49]:
encoded_query = sbert_model2.encode(query, convert_to_tensor=True)
similarity_scores = sbert_model2.similarity(encoded_query, encoded_data)[0]
scores, indices = torch.topk(similarity_scores, k=10)

In [50]:
for score, idx in zip(scores, indices):
    print(f"Ayat: {corpus[idx]}\nScore: {score}\n")

Ayat: (yang berada) di atas jalan yang lurus,
Score: 0.8601018190383911

Ayat: Dia (Allah) berfirman, “Ini adalah jalan yang lurus (menuju) kepada-Ku.”
Score: 0.7590444087982178

Ayat: Tunjukilah kami jalan yang lurus,
Score: 0.7579336166381836

Ayat: (yaitu) bagi siapa di antara kamu yang menghendaki menempuh jalan yang lurus.
Score: 0.7277039289474487

Ayat: Demi langit yang mempunyai jalan-jalan,
Score: 0.7072363495826721

Ayat: dan pasti Kami tunjukkan kepada mereka jalan yang lurus.
Score: 0.6908457279205322

Ayat: Dan hak Allah menerangkan jalan yang lurus, dan di antaranya ada (jalan) yang menyimpang. Dan jika Dia menghendaki, tentu Dia memberi petunjuk kamu semua (ke jalan yang benar).
Score: 0.6743772625923157

Ayat: Dan sesungguhnya engkau pasti telah menyeru mereka kepada jalan yang lurus.
Score: 0.667270302772522

Ayat: di dalamnya terdapat (isi) kitab-kitab yang lurus (benar).
Score: 0.66031813621521

Ayat: Maka berpegang teguhlah engkau kepada (agama) yang telah diwahyuka

## AutoTokenizer 

In [11]:
tokenizer = AutoTokenizer.from_pretrained(model1_name)
model = AutoModel.from_pretrained(model1_name)

In [12]:
def get_embedding(text, tokenizer, model):
    # Tokenizing the input text
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=128)
    
    # Print the tokenized components
    # tokens = tokenizer.tokenize(text)  # Get tokens as text
    # token_ids = inputs['input_ids'][0].numpy()  # Convert tensor to numpy array for readability
    
    # # Display tokenization details
    # print(f"Original Text: {text}")
    # print(f"Tokens: {tokens}")
    # print(f"Token IDs: {token_ids}")
    # print(f"Attention Mask: {inputs['attention_mask'][0].numpy()}")
    # print("\n" + "="*50 + "\n")
    
    # Pass through model without computing gradients
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Return the embedding from [CLS] token representation
    return outputs.last_hidden_state[:, 0, :].squeeze(0) 

In [31]:
ayat_embeddings_sbert = [get_embedding(ayat, tokenizer, model) for ayat in corpus]

In [30]:
query_embedding_sbert = get_embedding(query, tokenizer, model)

In [32]:
# Calculate cosine similarity
# similarities = [cosine_similarity(query_embedding, ayat_embedding)[0][0] for ayat_embedding in ayat_embeddings]
similarities = [torch.dot(query_embedding_sbert, ayat_embedding).item() for ayat_embedding in ayat_embeddings_sbert]

# Rank ayat by similarity
ranking = sorted(zip(corpus, similarities), key=lambda x: x[1], reverse=True)

# Print ranked ayat
for ayat, score in ranking:
    print(f"Ayat: {ayat}\nScore: {score}\n")

Ayat: Bukankah Kami telah menjadikan untuknya sepasang mata,
Score: 428.4288024902344

Ayat: kemudian Dia mematikannya lalu menguburkannya,
Score: 428.1026916503906

Ayat: sedang dia takut (kepada Allah),
Score: 424.635986328125

Ayat: Dan gunung-gunung bagaimana ditegakkan?
Score: 419.95794677734375

Ayat: Kamukah yang menciptakannya, ataukah Kami penciptanya?
Score: 414.4361877441406

Ayat: Apabila bumi diguncangkan sedahsyat-dahsyatnya,
Score: 406.6830139160156

Ayat: kemudian Kami belah bumi dengan sebaik-baiknya,
Score: 406.1723327636719

Ayat: Lalu para malaikat itu bersujud semuanya,
Score: 402.7583312988281

Ayat: dan langit, bagaimana ditinggikan?
Score: 400.5941162109375

Ayat: mereka itu memperoleh rezeki yang sudah ditentukan,
Score: 400.21728515625

Ayat: Mereka tinggal di sana dalam masa yang lama,
Score: 398.1305847167969

Ayat: Di sana ada mata air yang mengalir.
Score: 398.0472412109375

Ayat: Bahkan manusia menjadi saksi atas dirinya sendiri,
Score: 396.1717834472656


# SBERT vs indobert

## Indobert

In [33]:
tokenizer = AutoTokenizer.from_pretrained(model2_name)
model = AutoModel.from_pretrained(model2_name)

In [None]:
ayat_embeddings_indobert = [get_embedding(ayat, tokenizer, model) for ayat in corpus]
ayat_embeddings_indobert = [ayat.reshape(1, -1) for ayat in ayat_embeddings_indobert]

In [38]:
query_embedding_indobert = get_embedding(query, tokenizer, model)
query_embedding_indobert.reshape(1, -1)

tensor([[ 1.2724e+00, -1.4689e+00, -5.6605e-01, -5.6759e-02,  5.1447e-01,
          5.4387e-01, -1.3386e+00, -6.6781e-02,  9.3850e-01, -8.2752e-02,
         -1.0156e+00, -3.2174e-01,  4.1641e-01,  2.2798e-01, -9.6003e-01,
         -5.2328e-01, -5.8380e-01,  1.0278e+00,  1.0091e+00, -1.4218e+00,
          7.0414e-01, -5.1171e-03,  8.1370e-01,  2.6872e-02, -1.4076e+00,
          1.6010e-02,  7.8466e-01,  6.7906e-01, -3.6127e-01,  1.3430e+00,
         -6.9057e-01, -8.1474e-01,  2.6058e-01, -5.7250e-01,  7.7683e-01,
         -4.5440e-01,  3.7197e-01, -1.5467e+00, -6.2999e-01,  1.9202e-01,
          7.5064e-02,  4.4564e-01,  5.7052e-01,  6.6837e-01,  7.7885e-01,
          1.9194e-01,  1.2612e-01, -3.4817e-02,  7.8642e-01, -2.0393e-01,
         -1.2191e-01, -1.0682e+00, -1.6077e+00,  7.5807e-01, -7.1166e-01,
         -2.7974e-01,  1.5111e+00, -2.3163e-01, -7.0402e-01,  1.0442e+00,
          1.4910e+00, -1.0404e+00, -6.6112e-01, -2.1417e+00,  8.0576e-01,
         -2.0699e-01, -4.6933e-01,  1.

In [40]:
similarities_indobert = [cosine_similarity(query_embedding_indobert, ayat_embedding)[0][0] for ayat_embedding in ayat_embeddings_indobert]

ValueError: Expected 2D array, got 1D array instead:
array=[ 1.27240944e+00 -1.46888101e+00 -5.66045165e-01 -5.67594469e-02
  5.14466405e-01  5.43865681e-01 -1.33861673e+00 -6.67814761e-02
  9.38498914e-01 -8.27517435e-02 -1.01555431e+00 -3.21737260e-01
  4.16410863e-01  2.27979332e-01 -9.60025609e-01 -5.23281515e-01
 -5.83797455e-01  1.02775407e+00  1.00913143e+00 -1.42180252e+00
  7.04140007e-01 -5.11714816e-03  8.13704014e-01  2.68715322e-02
 -1.40764546e+00  1.60099491e-02  7.84663737e-01  6.79059267e-01
 -3.61270428e-01  1.34299457e+00 -6.90567672e-01 -8.14739585e-01
  2.60584146e-01 -5.72502375e-01  7.76827812e-01 -4.54397351e-01
  3.71966541e-01 -1.54666591e+00 -6.29993677e-01  1.92015395e-01
  7.50638172e-02  4.45643067e-01  5.70520163e-01  6.68367565e-01
  7.78851569e-01  1.91935897e-01  1.26116097e-01 -3.48169804e-02
  7.86417365e-01 -2.03933969e-01 -1.21911287e-01 -1.06816077e+00
 -1.60765433e+00  7.58074045e-01 -7.11655080e-01 -2.79739499e-01
  1.51109600e+00 -2.31631815e-01 -7.04019368e-01  1.04415250e+00
  1.49100900e+00 -1.04040766e+00 -6.61124170e-01 -2.14169383e+00
  8.05761874e-01 -2.06985503e-01 -4.69327062e-01  1.57120967e+00
 -9.92583334e-02 -2.89215595e-01  6.90314293e-01  1.29372764e+00
 -1.08620620e+00  8.94558549e-01 -5.42346239e-01  8.04382861e-01
 -9.74725634e-02  1.07201064e+00 -1.21343717e-01 -3.89460027e-02
 -6.59585476e-01  1.09507000e+00 -1.22683473e-01 -7.63670057e-02
  2.10214421e-01 -6.84131265e-01 -1.66125655e+00 -2.41768613e-01
 -8.14232707e-01  1.29741931e+00  1.84860155e-01  3.57257962e-01
 -7.67478764e-01 -6.23469472e-01  1.00845599e+00  1.66684106e-01
  5.06150424e-01  1.62201636e-02 -2.76423335e-01 -1.07048094e+00
  3.34137142e-01  3.10913801e-01 -7.83368647e-01 -1.56574368e+00
  3.29761684e-01 -6.89708233e-01  1.32866383e-01 -9.04206932e-01
 -1.46368635e+00 -4.21308607e-01 -1.56722367e-01 -3.04050326e-01
 -1.06656218e+00 -9.45373327e-02 -6.84240937e-01 -1.23413968e+00
  5.77824473e-01  1.26819938e-01  3.89412403e-01 -9.53653753e-02
 -7.07364738e-01 -1.05739009e+00  8.95432234e-01  1.78350723e+00
  8.77555311e-01 -5.21908641e-01 -6.38274610e-01  1.30766571e+00
  2.23490286e+00 -5.81456542e-01 -1.31876254e+00 -3.71311903e-01
  3.39785665e-02  8.98855776e-02  4.26007539e-01 -5.31984642e-02
 -4.17977154e-01  1.46146894e+00  1.32129893e-01 -9.32524145e-01
  3.68037373e-01 -3.26392204e-01  1.34413406e-01  5.17501712e-01
 -1.20633113e+00 -9.03187394e-02 -1.19393587e+00  2.30203867e+00
 -8.60539496e-01  1.93792582e-03 -1.94938278e+00 -2.99391866e-01
  8.57413650e-01 -3.42200994e-01  1.53142750e-01  3.14116263e+00
 -1.59188807e-02 -2.93178260e-01 -1.77137449e-01  1.01768231e+00
 -3.63961548e-01 -3.47532690e-01 -2.45231819e+00  2.19741774e+00
  6.74932957e-01  1.26713920e+00 -8.08143020e-01 -3.02400321e-01
  2.44308263e-01 -5.52667618e-01 -5.77937305e-01  1.14717491e-01
 -4.76638377e-01  2.91467488e-01 -4.16568369e-01  6.56544328e-01
  3.76012146e-01  8.03977132e-01 -8.79561305e-01  2.04343706e-01
 -1.71937421e-02 -2.56686300e-01 -9.24449742e-01 -1.27867401e-01
 -6.87688172e-01 -2.66893804e-01 -1.68004274e-01  3.44244003e-01
 -2.14406705e+00  1.44721496e+00 -4.55171168e-02 -9.82181057e-02
  6.65988982e-01  5.06344318e-01  3.32151324e-01 -1.28679723e-02
 -7.31790721e-01  2.15432978e+00  1.49649322e+00  1.82296872e-01
  6.41757965e-01 -1.01072299e+00  7.90665209e-01  1.83861601e+00
  3.03861886e-01 -1.56201705e-01  6.32334948e-01  2.19522625e-01
 -7.80175209e-01  2.16018155e-01  2.20596528e+00  1.39372957e+00
 -4.10557538e-01  1.46808505e-01 -2.09799156e-01  1.56820202e+00
  3.55605364e-01 -1.17105794e+00 -5.49220741e-01  2.13065577e+00
  8.85681629e-01  3.58409762e-01 -3.66161078e-01  2.28414202e+00
  9.95571375e-01 -8.38343322e-01 -7.81679392e-01 -8.25249314e-01
 -5.43285549e-01  3.22871506e-01 -1.61349446e-01  6.60444438e-01
 -3.79454017e-01  1.26128435e+00 -1.26129329e+00 -4.08308506e-02
  5.63535035e-01 -1.91834882e-01  1.02993155e+00  4.02309537e-01
 -8.47590983e-01  4.16439176e-01 -6.89782381e-01  1.60823655e+00
  3.81191850e-01  1.59551501e-01  1.49075603e+00 -1.50390422e+00
  2.41007417e-01  1.51362643e-01 -3.27135384e-01  1.90519559e+00
 -8.68180543e-02  1.47508705e+00  3.05373162e-01  1.45983231e+00
  1.08817625e+00  1.91376552e-01  1.47592694e-01 -3.07234913e-01
 -9.04023290e-01  1.63841450e+00  4.32811052e-01  1.71985492e-01
 -1.14673471e+00 -2.61361659e-01  1.65613234e-01  1.07687271e+00
  4.08253938e-01 -3.96714568e-01  4.74454999e-01 -1.13417792e+00
  2.71470606e-01  2.97994316e-01  4.10875529e-01  7.56067693e-01
 -1.41584432e+00 -1.47032559e+00 -7.32013524e-01  3.79030138e-01
  4.00391966e-01  4.84373063e-01  1.47278941e+00  1.03364360e+00
 -5.98440945e-01  9.25497770e-01  1.57638228e+00 -4.61880028e-01
  9.22325253e-01 -1.80843186e+00  3.65154684e-01 -1.84691936e-01
 -8.70514095e-01 -1.30292583e+00  6.15695238e-01  2.10133031e-01
 -2.34024119e+00 -1.39913344e+00  4.69509840e-01 -4.52057034e-01
  3.93858016e-01 -5.83501577e-01 -1.85836256e-02  6.68444097e-01
 -1.10479973e-01  7.83261895e-01  3.28287244e-01 -5.91419600e-02
  8.91142428e-01  4.13797259e-01  1.12610117e-01 -1.32263049e-01
 -9.51565802e-02 -3.00930798e-01  1.56350684e+00 -7.27937937e-01
  1.52805531e+00  2.12580109e+00 -3.03106084e-02  2.17270553e-02
 -2.60456741e-01  1.62510681e+00  3.58186632e-01 -2.61325300e-01
 -3.14607956e-02 -2.01902241e-01  4.42763478e-01 -2.81574279e-01
 -6.35967672e-01  8.17023039e-01 -6.02736712e-01  8.43875855e-02
  2.49854714e-01 -1.23138070e+00  8.38553786e-01  1.76376868e-02
  1.79772902e+00 -4.24044609e-01  8.84693742e-01 -6.06730133e-02
  3.81901085e-01 -4.51281577e-01  1.49828166e-01 -8.84143412e-01
 -1.07670426e+00  9.19988513e-01 -4.08773273e-01  1.26999509e+00
  1.66261506e+00 -1.31157592e-01 -4.98769373e-01  1.20995402e+00
  4.49568391e-01  4.98465896e-01 -1.03401911e+00  4.75188375e-01
  3.92419040e-01  3.87335598e-01 -4.08662677e-01 -4.17234033e-01
  3.88444096e-01 -1.14807975e+00  4.00561333e-01 -2.12384129e+00
 -7.52786458e-01 -6.39781296e-01 -1.67763159e-02 -1.52104950e+00
 -4.05955672e-01  2.08232254e-01  2.23269150e-01  8.33619654e-01
  8.46753538e-01  4.65875447e-01  1.15065598e+00 -1.05148113e+00
  1.27714193e+00  3.24958205e-01  1.31359720e+00 -5.91145270e-02
 -1.94794685e-02 -1.01840878e+00  1.27365172e+00  3.27217519e-01
 -1.33889627e+00 -1.15958869e-01  4.97696847e-02  6.56851590e-01
 -8.58012974e-01 -6.55359149e-01 -4.35349569e-02 -1.68213993e-01
  9.42489386e-01  8.20301890e-01  3.08974743e-01  2.39238799e-01
 -6.03752077e-01 -9.16961670e-01 -1.09020674e+00  1.49876565e-01
 -6.69819832e-01  7.34825194e-01  6.42791569e-01 -4.33704793e-01
  1.00733387e+00 -5.11524856e-01  5.78469872e-01  3.52394462e-01
 -1.18082702e+00  1.27241164e-01 -1.84154403e+00  5.58908939e-01
  2.20568404e-01 -1.08252239e+00  5.77332318e-01 -9.32605714e-02
 -2.52214491e-01  8.99082482e-01  5.43672800e-01  3.73684049e-01
 -5.75837183e+00 -5.33575952e-01  9.66267765e-01 -4.32005912e-01
  8.21748257e-01  1.72939312e+00 -1.93584010e-01  7.87145734e-01
  5.13549149e-03  8.06203187e-01  1.11095607e+00  9.91536498e-01
 -1.80202156e-01  1.20806599e+00  9.67185080e-01  4.81448352e-01
 -1.97238863e-01 -8.20659518e-01  4.00503367e-01  2.13144255e+00
 -3.55701059e-01  1.18022966e+00 -4.65305150e-01 -9.17420566e-01
 -3.95362467e-01  9.60678875e-01  2.61235654e-01 -9.55581248e-01
 -1.67772865e+00 -5.94003677e-01  2.06702447e+00  3.11105758e-01
  8.76694798e-01 -5.18991828e-01  2.11525488e+00 -1.07357085e+00
 -1.18857872e+00 -1.20133483e+00 -1.52034476e-01  1.54204771e-01
  1.82081878e+00  1.04170310e+00 -9.71836507e-01  4.85454798e-01
  2.14281827e-01 -6.03941530e-02  2.81589210e-01 -3.58236283e-01
  6.11299157e-01  8.68483901e-01 -6.74367487e-01 -2.79557586e-01
  1.96839392e+00 -8.56061339e-01 -2.65026003e-01  1.69649875e+00
 -9.56690133e-01 -6.41088039e-02  1.82220912e+00  2.77138084e-01
 -7.30737567e-01 -7.97676146e-01  1.81433082e-01 -1.34773159e+00
  1.99987561e-01 -9.88395870e-01  2.10421279e-01  2.21549779e-01
 -1.55959472e-01  2.58879006e-01 -9.44476426e-01  5.68247557e-01
 -1.19326138e+00  7.45080590e-01 -1.03030574e+00  4.37778950e-01
 -9.90743220e-01  2.21055806e-01  7.76925534e-02  8.17345828e-02
 -1.24345946e+00  2.59623706e-01  5.38395882e-01 -1.68816292e+00
 -5.62931478e-01 -1.08074391e+00  4.29695040e-01 -9.82938170e-01
 -9.84866977e-01  2.02469528e-02  5.49722910e-01 -7.32129693e-01
  8.04141462e-02 -1.71702981e-01  1.07659805e+00  1.54419824e-01
  9.20385599e-01 -9.89325166e-01 -3.83531690e-01 -6.46334529e-01
  5.09355009e-01 -5.02797425e-01 -2.76969254e-01  1.28672391e-01
  3.57774526e-01  1.44779909e+00 -4.06831801e-01  3.90516043e-01
 -3.02811265e-01 -9.51112628e-01 -5.76218188e-01  1.55108899e-01
 -3.97488661e-02 -2.28681135e+00  1.81590354e+00 -8.09069574e-01
  1.33655578e-01 -2.42921019e+00 -1.35493374e+00  2.42509782e-01
 -8.59206796e-01  1.00805357e-01  5.51464379e-01  3.92519057e-01
 -4.13162261e-01  1.02940619e-01  1.87757957e+00  1.58723131e-01
  7.97986910e-02  2.07273126e-01  5.41967094e-01  1.49886501e+00
 -6.01796389e-01  1.04955101e+00 -1.00721374e-01 -2.03806162e+00
  1.48016915e-01 -1.20024216e+00  9.48600411e-01  1.92578638e+00
 -8.24490368e-01  2.26517230e-01 -3.30676496e-01 -4.61932898e-01
 -5.65325379e-01 -1.38281584e-01 -8.61206800e-02 -3.80072773e-01
  1.12620068e+00 -1.01258981e+00  3.51794571e-01 -6.71529531e-01
  1.13573015e+00  4.42027092e-01  1.28422451e+00  9.75192666e-01
 -4.64052022e-01 -4.46249425e-01 -1.51130223e+00  1.67983520e+00
  3.66829991e-01  3.71629715e-01 -1.04377067e+00 -8.38887870e-01
 -9.00894046e-01  2.65373886e-01  1.24132633e+00 -9.42788363e-01
  3.37646663e-01 -1.21764076e+00 -1.20406604e+00  1.13221788e+00
 -1.28301930e+00 -9.68711972e-01  5.18070102e-01  1.70303655e+00
  2.12083769e+00 -8.86532187e-01  2.01021209e-01 -9.62902546e-01
 -3.32727253e-01 -2.58459300e-01  3.29278976e-01  1.06090724e+00
  1.81940213e-01  5.76823771e-01 -1.69214636e-01 -7.04725146e-01
  4.15326446e-01  1.78504050e-01 -1.04073346e+00  2.90748030e-01
  2.70246530e+00  7.82758474e-01 -7.37160206e-01  5.71981132e-01
 -2.21107036e-01  2.13891596e-01 -5.34987569e-01 -4.65050906e-01
  1.56264484e-01 -9.52406079e-02 -3.14213187e-02  7.57812917e-01
 -3.59902322e-01  3.74483138e-01 -1.48967755e+00 -4.81890082e-01
  2.22513467e-01  5.57259142e-01  4.37042505e-01  1.25643790e-01
 -3.74193490e-01  6.25535071e-01 -7.84776881e-02 -7.72247434e-01
 -2.56599367e-01  1.21183388e-01 -1.32848501e-01  7.43575454e-01
  3.61083657e-01  6.22196972e-01  7.36890852e-01 -1.08712471e+00
 -8.24139535e-01 -1.03319836e+00  6.17523313e-01 -1.48206127e+00
 -1.33878386e+00  2.89109975e-01  2.88449109e-01 -1.14079416e+00
 -1.22260499e+00 -6.20453835e-01  3.28158170e-01 -2.79644430e-01
  1.13700485e+00 -1.72942206e-01 -3.87393944e-02  7.68415332e-02
 -2.58282757e+00  1.32788211e-01  5.95720351e-01  8.51961493e-01
  1.60244381e+00 -3.90224576e-01  1.10542941e+00  1.57289171e+00
  1.76595747e-01 -1.33335650e+00 -1.30948925e+00 -2.72618413e-01
 -8.24548125e-01 -2.14767650e-01  1.05144843e-01 -4.35485542e-01
  2.22887546e-01 -5.17130435e-01  7.88531065e-01 -1.77286506e-01
  5.54013550e-01 -2.34023005e-01  3.48525077e-01 -3.59971821e-01
 -9.50042069e-01 -1.61267266e-01  1.99672431e-01  6.92940056e-01
 -4.96716291e-01  1.76912442e-01 -1.09056436e-01  2.17507631e-01
  7.55139440e-02  1.15804863e+00  3.39423269e-01  1.10947204e+00
 -8.85710299e-01  1.42573702e+00 -1.06390893e-01  5.76226294e-01
 -6.73193932e-01 -3.05442493e-02 -3.93723130e-01  3.74297768e-01
 -1.30155098e+00 -6.03697971e-02 -1.56669712e+00 -3.45792681e-01
  9.21484947e-01  1.34789574e+00 -7.83489585e-01  3.91056091e-01
 -8.17545891e-01  6.74918175e-01 -8.56577158e-02  2.24126488e-01
  1.75446796e+00  7.13750720e-01  3.68587583e-01  5.19217491e-01
  1.23393345e+00 -1.11699551e-01 -3.80230963e-01  9.48484719e-01
  3.02225798e-02  1.11497509e+00  1.03948212e+00 -1.96318090e-01
  7.28718862e-02 -5.35455525e-01 -9.21985209e-01 -2.25174141e+00
  5.30727506e-01  1.16539848e+00 -1.39914584e+00  2.69374520e-01
 -1.87153116e-01 -2.81176567e-01  1.82094026e+00 -9.44165468e-01
  1.34091973e-01 -2.48406553e+00 -5.37436843e-01  7.15227187e-01
 -2.91047740e+00  2.00792536e-01  1.39731455e+00 -3.72997940e-01
 -1.56943369e+00 -4.54575777e-01  9.03748572e-01  3.23534578e-01
  6.70896947e-01  1.33094215e+00 -1.88835084e+00  8.57850760e-02
  3.87519926e-01 -1.07765198e-02  4.72889602e-01 -4.80112195e-01
  2.73400331e+00 -1.50857556e+00 -1.88178158e+00 -9.95804906e-01
  4.95517254e-01  4.78936374e-01  2.25985155e-01 -6.55329108e-01].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [None]:
# Rank ayat by similarity
ranking = sorted(zip(corpus, similarities_indobert), key=lambda x: x[1], reverse=True)

# Print ranked ayat
for ayat, score in ranking:
    print(f"Ayat: {ayat}\nScore: {score}\n")