In [1]:
from jina import Flow, Executor, requests, Document, DocumentArray
import pandas as pd
import numpy as np 

### 3 main fundamental jina concepts 

* document
* executor 
* flow

In [2]:
df = pd.read_csv('lyrics-JINA - Sheet1.csv')
df.head()

Unnamed: 0,Index,Singer,Song,Lyrics,Collected By
0,1,Bruno Mars,24 K Magic,Tonight\nI just want to take you higher\nThrow...,Ishita
1,2,Taylor Swift,Wildest Dreams,"He said, ""Let's get out of this town\nDrive ou...",Ishita
2,3,Green Day,Numb,I'm tired of being what you want me to be\nFee...,Ishita
3,4,Macklemore,Glorius,You know I'm back like I never left\nAnother s...,Ishita
4,5,Ariana Grande,POV,It's like you got superpowers\nTurn my minutes...,Ishita


In [3]:
df = df.drop_duplicates().dropna()
df.shape

(33, 5)

### sample songs

In [4]:
for ind in range(3):
    print()
    print(f" Song name : {df.iloc[ind, 2]} ".center(80, ' '))
    print(f" Singer Name : {df.iloc[ind, 1]} ".center(80, ' '))
    # print(f" Singer name : {df.iloc[ind, 0]} ".center(80, ' '))
    print()
    print(df.iloc[ind, 3])
    print()
    print('-' * 80)


                             Song name : 24 K Magic                             
                            Singer Name : Bruno Mars                            

Tonight
I just want to take you higher
Throw your hands up in the sky
Let's set this party off right
Players, put yo' pinky rings up to the moon
Girls, what y'all trying to do?
Twenty four karat magic in the air
Head to toe so player
Look out uh
Pop pop, it's show time (show time)
Show time (show time)
Guess who's back again?
Oh they don't know? (Go on tell 'em)
Oh they don't know? (Go on tell 'em)
I bet they know soon as we walk in (showin' up)
Wearing Cuban links (ya)
Designer minks (ya)
Inglewood's finest shoes (whoop, whoop)
Don't look too hard might hurt ya'self
Known to give the color red the blues
Oh shit, I'm a dangerous man with some money in my pocket (keep up)
So many pretty girls around me and they waking up the rocket (keep up)
Why you mad, fix ya face, ain't my fault y'all be jocking (keep up)
Players only, com

### creating a document and documentArray

In [5]:
# basic document 

# Document(text = 'something')  # use it for strings 
# Document(content = 'something') # use it when you dont know what type of jina data type it is 
# Document(blob = 'img, vid, audio') # use it for images, audios and videos
# Document(uri = 'local file path or url link') # can be any form a local or remote link

In [6]:
docs = DocumentArray()
for ind in range(df.shape[0]):
    song_name = df.iloc[ind, 2]
    lyr = df.iloc[ind, 3]
    doc = Document(text = lyr)
    doc.tags['song_name'] = song_name
    docs.append(doc)
    
docs

<jina.types.arrays.document.DocumentArray (length=33) at 139727250103024>

In [7]:
docs[0].json()

'{\n  "id": "5062c091-3e63-11ec-ac72-f469d5c3a4bd",\n  "mime_type": "text/plain",\n  "tags": {\n    "song_name": "24 K Magic"\n  },\n  "text": "Tonight\\nI just want to take you higher\\nThrow your hands up in the sky\\nLet\'s set this party off right\\nPlayers, put yo\' pinky rings up to the moon\\nGirls, what y\'all trying to do?\\nTwenty four karat magic in the air\\nHead to toe so player\\nLook out uh\\nPop pop, it\'s show time (show time)\\nShow time (show time)\\nGuess who\'s back again?\\nOh they don\'t know? (Go on tell \'em)\\nOh they don\'t know? (Go on tell \'em)\\nI bet they know soon as we walk in (showin\' up)\\nWearing Cuban links (ya)\\nDesigner minks (ya)\\nInglewood\'s finest shoes (whoop, whoop)\\nDon\'t look too hard might hurt ya\'self\\nKnown to give the color red the blues\\nOh shit, I\'m a dangerous man with some money in my pocket (keep up)\\nSo many pretty girls around me and they waking up the rocket (keep up)\\nWhy you mad, fix ya face, ain\'t my fault y\'al

In [8]:
docs[0].text

"Tonight\nI just want to take you higher\nThrow your hands up in the sky\nLet's set this party off right\nPlayers, put yo' pinky rings up to the moon\nGirls, what y'all trying to do?\nTwenty four karat magic in the air\nHead to toe so player\nLook out uh\nPop pop, it's show time (show time)\nShow time (show time)\nGuess who's back again?\nOh they don't know? (Go on tell 'em)\nOh they don't know? (Go on tell 'em)\nI bet they know soon as we walk in (showin' up)\nWearing Cuban links (ya)\nDesigner minks (ya)\nInglewood's finest shoes (whoop, whoop)\nDon't look too hard might hurt ya'self\nKnown to give the color red the blues\nOh shit, I'm a dangerous man with some money in my pocket (keep up)\nSo many pretty girls around me and they waking up the rocket (keep up)\nWhy you mad, fix ya face, ain't my fault y'all be jocking (keep up)\nPlayers only, come on\nPut your pinky rings up to the moon\nGirls, what y'all trying to do?\nTwenty four karat magic in the air\nHead to toe so player\nUh, l

In [9]:
docs[0]

### creating your own executor

In [10]:
class Clean(Executor):
    @requests
    def clean(self, docs, parameters, **kwargs):
        print(parameters)
        return DocumentArray(list(filter(lambda x : len(x.text) > 0, docs)))

### Flow

In [11]:
model = "sentence-transformers/paraphrase-distilroberta-base-v1"

flow = (
    Flow()
    .add(
        name="lyrics_text_encoder",
        uses="jinahub://TransformerTorchEncoder",
        uses_with={"pretrained_model_name_or_path": model},
    )
    .add(
        name="lyrics_text_indexer",
        uses='jinahub://SimpleIndexer',
    )
)

In [12]:
flow

In [13]:
#docs[0]

In [14]:
!rm -rf workspace # Remove workspace in case we've indexed stuff before

In [15]:
with flow:
    flow.index(
        inputs=docs,
  )



[32m‚†ã[0m 0/3 waiting [33mlyrics_text_encoder lyrics_text_indexer gateway[0m to be ready...      



[32m‚†ß[0m 2/3 waiting [33mlyrics_text_encoder[0m to be ready...                                  



           Flow@69348[I]:[32müéâ Flow is ready to use![0m                                          
	üîó Protocol: 		[1mGRPC[0m
	üè† Local access:	[4m[36m0.0.0.0:48391[0m
	üîí Private network:	[4m[36m192.168.111.107:48391[0m[0m




In [16]:
for i in docs:
    if len(i.text) == 0:
        print(i.text)

In [39]:

model = "sentence-transformers/paraphrase-distilroberta-base-v1" # Any model from Huggingface

In [40]:
from jina import Flow

In [None]:
flow = (
    Flow()
    .add(
        name="error_text_encoder",
        uses="jinahub://TransformerTorchEncoder",
        uses_with={"pretrained_model_name_or_path": model},
    )
    .add(
        name="error_text_indexer",
        uses='jinahub://SimpleIndexer',
    )
)

### indexing 

In [None]:
!rm -rf workspace

with flow:
    flow.index(
      inputs=docs,
        docs = docs,
        parameters = {'name' : 'somethign', 'xyz' : 'fsdfsdfsa'}
  )

### querying 

In [None]:
query = Document(text = input('Query product : '))
with flow:
    response = flow.search(inputs = query, return_results = True)

### finding the best match

In [None]:
matches = response[0].docs[0].matches
matches

In [None]:
for m in matches:
    print()
    print(f" product name : {m.text} ".center(80, ' '))
    print()
    print(m.tags['description'])
    print()
    print('-' * 80)