Installing the required package

In [6]:
!pip install tensorflow-hub==0.7.0


Collecting tensorflow-hub==0.7.0
  Downloading tensorflow_hub-0.7.0-py2.py3-none-any.whl (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.3/89.3 kB[0m [31m631.7 kB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow-hub
  Attempting uninstall: tensorflow-hub
    Found existing installation: tensorflow-hub 0.14.0
    Uninstalling tensorflow-hub-0.14.0:
      Successfully uninstalled tensorflow-hub-0.14.0
Successfully installed tensorflow-hub-0.7.0


Importing the required Libraries

In [7]:
import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

import os
import numpy as np
import pandas as pd
import tensorflow as tf;
import tensorflow_hub as hub
from sklearn import preprocessing

import spacy
from spacy.lang.en import English
from spacy import displacy
nlp = spacy.load('en_core_web_sm')

import logging
logging.getLogger('tensorflow').disabled = True #OPTIONAL - to disable outputs from Tensorflow

from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA



In [8]:
print("tensorflow version:", tf.__version__)

tensorflow version: 2.12.0


In [9]:
print('tensorflow-hub version:', hub.__version__)

tensorflow-hub version: 0.7.0


Loading the data

In [12]:
df = pd.read_excel('Marvel Datastore.xlsx')
df.head()

Unnamed: 0,Description
0,"Thor Odinson is the Asgardian God of Thunder, ..."
1,"Upon being welcomed back to Asgard as a hero, ..."
2,Thor returned to Asgard having defeated his br...
3,Loki Laufeyson was the biological son of Laufe...
4,"Transported by the wormhole to Sanctuary, Loki..."


Loading the ELMo model


In [13]:
# Clear the TensorFlow session
tf.compat.v1.reset_default_graph()

# disable eaeger execution
tf.compat.v1.disable_eager_execution()

url = "https://tfhub.dev/google/elmo/3"
embed = hub.Module(url)

In [14]:
text = ' '.join(df.Description) # the column name is description
text = text.lower().replace('\n', ' ').replace('\t', ' ').replace('\xa0',' ')
text = ' '.join(text.split())
doc = nlp(text)
sentences = []
for i in doc.sents:
  if len(i)>1:
    sentences.append(i.text.strip())

sentences[0:5]

['thor odinson is the asgardian god of thunder, the former king of asgard and new asgard, and a founding member of the avengers.',
 'when his irresponsible and impetuous behavior reignited a conflict between asgard and jotunheim, thor was denied the right to become king, stripped of his power, and banished to earth by odin.',
 'while exiled on earth, thor learned humility, finding love with jane foster, and helped save his new friends from the destroyer sent by loki.',
 "due to his selfless act of sacrifice, thor redeemed himself in his father's eyes and was granted his power once more, which he then used to defeat loki's schemes of genocide.",
 'upon being welcomed back to asgard as a hero, thor was forced to return to earth in order to retrieve loki after he had survived to fall through a black hole and had begun his attempt at world domination, having taken possession of the tesseract.']

Lets embed the data (make sure the sentences are in list)

In [15]:
embeddings = embed(
    sentences,
    signature="default",
    as_dict=True)["default"]

with tf.compat.v1.Session() as sess:
  sess.run(tf.compat.v1.global_variables_initializer())
  sess.run(tf.compat.v1.tables_initializer())
  x = sess.run(embeddings)

In [16]:
x # its nothing but some array of numbers, or we can say it as a embedding

array([[-0.00733614, -0.18304347,  0.16322094, ..., -0.22172855,
         0.624712  ,  0.07290838],
       [ 0.00165509, -0.26581827,  0.16643186, ...,  0.02585867,
         0.34442008,  0.05207236],
       [ 0.13634636, -0.17421655, -0.00072701, ...,  0.13014363,
         0.41827026, -0.03455783],
       ...,
       [ 0.10434115, -0.05767896, -0.1373919 , ..., -0.04191431,
         0.15608196, -0.02752784],
       [ 0.43052787, -0.33813584,  0.00736421, ..., -0.19393805,
         0.5453068 , -0.04744105],
       [ 0.08791256, -0.12528047, -0.02484905, ..., -0.22180447,
         0.436184  ,  0.0747583 ]], dtype=float32)

In [17]:
x.shape #the embedding dimension is 1024, that means it converted each words/sentence into a 1024 dimension vector

(54, 1024)

Implementation

In [18]:
search_string = "what is thor's weapon"
results_returned = '3'

Lets embed this search word

In [19]:
embeddings2  = embed(
    [search_string],
    signature = 'default',
    as_dict=True
)['default']
with tf.compat.v1.Session() as sess:
  sess.run(tf.compat.v1.global_variables_initializer())
  sess.run(tf.compat.v1.tables_initializer())
  search_vect = sess.run(embeddings2)

In [20]:
search_vect.shape

(1, 1024)

Now we have the vectos for search word and for all the sentences. Next just find the cosine similarity

In [21]:
cosine_similarities = pd.Series(cosine_similarity(search_vect, x).flatten())

In [22]:
cosine_similarities

0     0.464203
1     0.519069
2     0.453402
3     0.476232
4     0.441831
5     0.446083
6     0.459262
7     0.473089
8     0.453213
9     0.445878
10    0.494961
11    0.447674
12    0.392118
13    0.435588
14    0.494164
15    0.480234
16    0.482558
17    0.457480
18    0.421674
19    0.415203
20    0.472791
21    0.433930
22    0.467802
23    0.449041
24    0.372141
25    0.476295
26    0.424813
27    0.478101
28    0.387470
29    0.528599
30    0.492825
31    0.383729
32    0.425509
33    0.454564
34    0.399482
35    0.471002
36    0.491498
37    0.445816
38    0.415831
39    0.389129
40    0.406797
41    0.339351
42    0.428758
43    0.454969
44    0.512350
45    0.496426
46    0.446618
47    0.485352
48    0.456896
49    0.434216
50    0.449379
51    0.417961
52    0.440169
53    0.424129
dtype: float32

In [23]:
output =""
for i,j in cosine_similarities.nlargest(int(results_returned)).iteritems():
  for i in sentences[i].split():
    if i.lower() in search_string:
      output += " "+str(i)+ ","
    else:
      output += " "+str(i)

output_list = list(output.split("."))

  for i,j in cosine_similarities.nlargest(int(results_returned)).iteritems():


In [24]:
print(output_list)

[' stormbreaker is, an enchanted axe used by thor', ' when his irresponsible and impetuous behavior reignited a, conflict between asgard and jotunheim, thor, was denied the right to become king, stripped of his power, and banished to earth by odin', " when all the people of asgard refused to bow to her, hela set about massacring odin's armies and enslaving their people, while recruiting skurge to be her own executioner in the process", '']


In [None]:
!pip install allennlp_models

In [None]:
import allennlp_models
from allennlp.predictors.predictor import Predictor

predictor = Predictor.from_path("hf://allenai/bidaf-elmo")
predictor_input = {"passage": "My name is Wolfgang and I live in Berlin", "question": "Where do I live?"}
predictions = predictor.predict_json(predictor_input)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


Fetching 11 files:   0%|          | 0/11 [00:00<?, ?it/s]

Downloading:   0%|          | 0.00/1.22k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/3.85k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/137k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/4.93k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.03k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/450M [00:00<?, ?B/s]

Downloading: 0.00B [00:00, ?B/s]

Downloading:   0%|          | 0.00/14.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/842k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/450M [00:00<?, ?B/s]



Output()

Output()

In [None]:
!pip install mteb

In [None]:
from mteb import MTEB

evaluation = MTEB(tasks=["Banking77Classification"])
results = evaluation.run(predictor, output_folder=f"results/bidaf-elmo")

Downloading and preparing dataset json/mteb--banking77 to /root/.cache/huggingface/datasets/mteb___json/mteb--banking77-e49f5b530115f301/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...


ERROR:mteb.evaluation.MTEB:Error while evaluating Banking77Classification: 'ReadingComprehensionPredictor' object has no attribute 'encode'


Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/mteb___json/mteb--banking77-e49f5b530115f301/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.

Task: Banking77Classification, split: test. Running...


AttributeError: ignored