# Bases de datos vectoriales

## Codificar datos a embeddings

In [1]:
!pip install -U sentence-transformers
!pip install chromadb
!pip install openai



In [12]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util

In [13]:
df = pd.read_csv('imdb_top_1000.csv')

In [14]:
df.head()

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,A,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,80.0,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,28341469
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,A,175 min,"Crime, Drama",9.2,An organized crime dynasty's aging patriarch t...,100.0,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,134966411
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,UA,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,84.0,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,534858444
3,https://m.media-amazon.com/images/M/MV5BMWMwMG...,The Godfather: Part II,1974,A,202 min,"Crime, Drama",9.0,The early life and career of Vito Corleone in ...,90.0,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1129952,57300000
4,https://m.media-amazon.com/images/M/MV5BMWU4N2...,12 Angry Men,1957,U,96 min,"Crime, Drama",9.0,A jury holdout attempts to prevent a miscarria...,96.0,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,689845,4360000


In [15]:
df['text'] = df.apply(lambda x : x['Overview']+' '+x['Director']+' '+x['Star1']+' '+x['Star2']+' '+x['Star3']+' '+x['Star4'], axis=1)

In [16]:
df["text"][0]

'Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency. Frank Darabont Tim Robbins Morgan Freeman Bob Gunton William Sadler'

In [17]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [18]:
embeddings = model.encode(df['text'],batch_size=64,show_progress_bar=True)

Batches:   0%|          | 0/16 [00:00<?, ?it/s]

In [19]:
df['embeddings'] = embeddings.tolist()

In [20]:
df['ids'] = df.index # llevar el indice del dataframe a una columna
df['ids'] = df['ids'].astype('str') # convert to string

In [21]:
df["embeddings"]

0      [-0.11102542281150818, -0.009452976286411285, ...
1      [-0.10081295669078827, -0.021370654925704002, ...
2      [0.005191349890083075, 0.04237934201955795, -0...
3      [-0.07282990217208862, 0.007155457977205515, -...
4      [-0.08513770252466202, 0.05780964344739914, -0...
                             ...                        
995    [0.028408067300915718, -0.02919202297925949, -...
996    [0.024076657369732857, -0.0367053858935833, 0....
997    [-0.08005402237176895, 0.05756458640098572, -0...
998    [-0.020807620137929916, 0.05352265015244484, -...
999    [-0.04539767652750015, 0.04073355346918106, -0...
Name: embeddings, Length: 1000, dtype: object

## Chroma

In [14]:
import chromadb
from chromadb.utils import embedding_functions

In [15]:
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key='xxxxxxxx',
    model_name = 'text-embedding-ada-002'
)

sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name = 'all-MiniLM-L6-v2') 

In [16]:
# instanciamos un cliente 
chroma_client = chromadb.Client()
# para hacer la conexion peristente
client_persistent = chromadb.PersistentClient(path='./chroma.db')



In [17]:
# crear coleccion 
db = client_persistent.get_collection(name='imdb',embedding_function= sentence_transformer_ef)

In [19]:
# carga de datos o insercion de datos
db.add(
    ids = df['ids'].tolist(), # llevar los ids del dataframe a una lista para la base de datos
    embeddings = df['embeddings'].tolist(), # parametro referente a los embeddings
    metadatas = df.drop(['ids', 'embeddings', 'text'], axis=1, inplace=False).to_dict('records') # para los metadatos
    # con el implace a false no se modifica el dataframe original

    
)

Add of existing embedding ID: 0
Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Add of existing embedding ID: 4
Add of existing embedding ID: 5
Add of existing embedding ID: 6
Add of existing embedding ID: 7
Add of existing embedding ID: 8
Add of existing embedding ID: 9
Add of existing embedding ID: 10
Add of existing embedding ID: 11
Add of existing embedding ID: 12
Add of existing embedding ID: 13
Add of existing embedding ID: 14
Add of existing embedding ID: 15
Add of existing embedding ID: 16
Add of existing embedding ID: 17
Add of existing embedding ID: 18
Add of existing embedding ID: 19
Add of existing embedding ID: 20
Add of existing embedding ID: 21
Add of existing embedding ID: 22
Add of existing embedding ID: 23
Add of existing embedding ID: 24
Add of existing embedding ID: 25
Add of existing embedding ID: 26
Add of existing embedding ID: 27
Add of existing embedding ID: 28
Add of existing embedding ID: 29
Add of existing embe

In [20]:
db.peek(1) # para ver el primer registro

{'ids': ['0'],
 'embeddings': [[-0.1110253855586052,
   -0.009453012607991695,
   -0.06168840080499649,
   -0.018297705799341202,
   -0.009833205491304398,
   0.06417807936668396,
   0.08203237503767014,
   -0.0490679033100605,
   0.06276258826255798,
   -0.03238987177610397,
   0.01261426042765379,
   0.039338234812021255,
   -0.025924397632479668,
   0.02284662425518036,
   -0.025803232565522194,
   -0.030675353482365608,
   -0.06829347461462021,
   0.03191167116165161,
   0.025044336915016174,
   0.07082389295101166,
   0.006209637504070997,
   -0.030562784522771835,
   -0.01686890795826912,
   -0.03708736225962639,
   0.004426794126629829,
   0.032468799501657486,
   -0.010899757035076618,
   -0.05131012201309204,
   -0.003628186183050275,
   0.0382995530962944,
   0.028951983898878098,
   0.014759071171283722,
   0.10134261101484299,
   0.04917679727077484,
   0.0685054212808609,
   0.021276891231536865,
   0.007060502655804157,
   0.028189919888973236,
   0.05037761107087135,
   

## Chroma embeddings

In [29]:
# a traves de get collection se puede acceder a la coleccion existente 
db_no_embeddings = client_persistent.get_collection(name='movies_db_no_embeddings')

In [None]:
db_no_embeddings.add(
    ids = df['ids'].tolist(), 
    documents= df['text'].tolist(),
    metadatas = df.drop(['ids', 'embeddings', 'text'], axis=1, inplace=False).to_dict('records')
    
)

In [30]:
db_no_embeddings.peek(1)

{'ids': ['1'],
 'embeddings': [[-0.10081302374601364,
   -0.02137051522731781,
   -0.05492264777421951,
   -0.02907075732946396,
   -0.09806850552558899,
   0.03351396694779396,
   0.033286888152360916,
   0.0022160769440233707,
   -0.0223348718136549,
   0.04345111921429634,
   0.08506180346012115,
   0.0005228392546996474,
   0.018331186845898628,
   0.003509313566610217,
   -0.04528844356536865,
   -0.02041180431842804,
   -0.03603034466505051,
   0.0033596137072890997,
   -0.002141389762982726,
   0.010022429749369621,
   -0.028846027329564095,
   -0.0999264195561409,
   0.012006587348878384,
   0.03996679559350014,
   -0.07681243866682053,
   0.059612859040498734,
   -0.011757124215364456,
   0.020119303837418556,
   -0.028658024966716766,
   -0.015758810564875603,
   -0.03031150996685028,
   0.013745659030973911,
   0.04075117036700249,
   0.04419570043683052,
   0.05254553630948067,
   -0.05740448459982872,
   0.04785593971610069,
   0.027237486094236374,
   0.07748442888259888,

In [28]:
# eliminar el primer documento de la coleccion
db_no_embeddings.delete(ids = ['0'])

In [29]:
db_no_embeddings.peek(1)

{'ids': ['1'],
 'embeddings': [[-0.10081302374601364,
   -0.02137051522731781,
   -0.05492264777421951,
   -0.02907075732946396,
   -0.09806850552558899,
   0.03351396694779396,
   0.033286888152360916,
   0.0022160769440233707,
   -0.0223348718136549,
   0.04345111921429634,
   0.08506180346012115,
   0.0005228392546996474,
   0.018331186845898628,
   0.003509313566610217,
   -0.04528844356536865,
   -0.02041180431842804,
   -0.03603034466505051,
   0.0033596137072890997,
   -0.002141389762982726,
   0.010022429749369621,
   -0.028846027329564095,
   -0.0999264195561409,
   0.012006587348878384,
   0.03996679559350014,
   -0.07681243866682053,
   0.059612859040498734,
   -0.011757124215364456,
   0.020119303837418556,
   -0.028658024966716766,
   -0.015758810564875603,
   -0.03031150996685028,
   0.013745659030973911,
   0.04075117036700249,
   0.04419570043683052,
   0.05254553630948067,
   -0.05740448459982872,
   0.04785593971610069,
   0.027237486094236374,
   0.07748442888259888,

### Chroma Query

In [31]:
results = db_no_embeddings.query(
    query_texts=["a history with a elves and a ring"],# el mismo cliente realiza la conversion de la query a embeddings y y busca el por debajp distancias enrte los vectores p
    n_results=2
)

/Users/ignaciocarrenoromero/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:04<00:00, 19.2MiB/s]


In [32]:
results

{'ids': [['609', '10', '5']],
 'distances': [[0.8863633275032043, 1.046806812286377, 1.1970020532608032]],
 'metadatas': [[{'Certificate': 'UA',
    'Director': 'Peter Jackson',
    'Genre': 'Adventure, Fantasy',
    'Gross': '258,366,855',
    'IMDB_Rating': 7.8,
    'Meta_score': 66.0,
    'No_of_Votes': 601408,
    'Overview': 'The dwarves, along with Bilbo Baggins and Gandalf the Grey, continue their quest to reclaim Erebor, their homeland, from Smaug. Bilbo Baggins is in possession of a mysterious and magical ring.',
    'Poster_Link': 'https://m.media-amazon.com/images/M/MV5BMzU0NDY0NDEzNV5BMl5BanBnXkFtZTgwOTIxNDU1MDE@._V1_UX67_CR0,0,67,98_AL_.jpg',
    'Released_Year': '2013',
    'Runtime': '161 min',
    'Series_Title': 'The Hobbit: The Desolation of Smaug',
    'Star1': 'Ian McKellen',
    'Star2': 'Martin Freeman',
    'Star3': 'Richard Armitage',
    'Star4': 'Ken Stott'},
   {'Certificate': 'U',
    'Director': 'Peter Jackson',
    'Genre': 'Action, Adventure, Drama',
    

In [33]:
results['metadatas'][0][0]

{'Certificate': 'UA',
 'Director': 'Peter Jackson',
 'Genre': 'Adventure, Fantasy',
 'Gross': '258,366,855',
 'IMDB_Rating': 7.8,
 'Meta_score': 66.0,
 'No_of_Votes': 601408,
 'Overview': 'The dwarves, along with Bilbo Baggins and Gandalf the Grey, continue their quest to reclaim Erebor, their homeland, from Smaug. Bilbo Baggins is in possession of a mysterious and magical ring.',
 'Poster_Link': 'https://m.media-amazon.com/images/M/MV5BMzU0NDY0NDEzNV5BMl5BanBnXkFtZTgwOTIxNDU1MDE@._V1_UX67_CR0,0,67,98_AL_.jpg',
 'Released_Year': '2013',
 'Runtime': '161 min',
 'Series_Title': 'The Hobbit: The Desolation of Smaug',
 'Star1': 'Ian McKellen',
 'Star2': 'Martin Freeman',
 'Star3': 'Richard Armitage',
 'Star4': 'Ken Stott'}

### Where

1. Estructura

`
{
    "campo_metadatos": {
        <Operador>: <valor>
    }
}
`

2. Operadores
    

    $eq - igual a (cadena, entero, flotante)

    $ne - no igual a (cadena, entero, flotante)

    $gt - mayor que (entero, flotante)

    $gte - mayor o igual que (entero, flotante)

    $lt - menor que (entero, flotante)

    $lte - menor o igual que (entero, flotante)
    

In [34]:
results = db_no_embeddings.query(
    query_texts=['a history with elves and a ring'],
    n_results=2,
    where = {
        'IMDB_Rating':{
            "$gte": 8
        }
    }
    
)


In [38]:
results

{'ids': [['10', '5', '13']],
 'distances': [[1.0577082633972168, 1.1908094882965088, 1.2519581317901611]],
 'metadatas': [[{'Certificate': 'U',
    'Director': 'Peter Jackson',
    'Genre': 'Action, Adventure, Drama',
    'Gross': '315,544,750',
    'IMDB_Rating': 8.8,
    'Meta_score': 92.0,
    'No_of_Votes': 1661481,
    'Overview': 'A meek Hobbit from the Shire and eight companions set out on a journey to destroy the powerful One Ring and save Middle-earth from the Dark Lord Sauron.',
    'Poster_Link': 'https://m.media-amazon.com/images/M/MV5BN2EyZjM3NzUtNWUzMi00MTgxLWI0NTctMzY4M2VlOTdjZWRiXkEyXkFqcGdeQXVyNDUzOTQ5MjY@._V1_UX67_CR0,0,67,98_AL_.jpg',
    'Released_Year': '2001',
    'Runtime': '178 min',
    'Series_Title': 'The Lord of the Rings: The Fellowship of the Ring',
    'Star1': 'Elijah Wood',
    'Star2': 'Ian McKellen',
    'Star3': 'Orlando Bloom',
    'Star4': 'Sean Bean'},
   {'Certificate': 'U',
    'Director': 'Peter Jackson',
    'Genre': 'Action, Adventure, Drama'

In [41]:
results2 = db_no_embeddings.query(
    query_texts=["science fiction film"],
    n_results=2,
    where= {
        "IMDB_Rating":{
            "$gte": 5
    }
    }
)

In [48]:
# buscamos en los resultados el Series_Title para ver que peliculas nos trae
results2['metadatas'][0][1]['Series_Title']

'King Kong'

In [53]:
results3  = db_no_embeddings.query(
    query_texts=[" a true story about"],
    n_results=2,
    where= {
        "IMDB_Rating":{
            "$gte": 7
        }
    }
    
)

In [61]:
print(results3['metadatas'][0][0]['Series_Title'])
print(results3['metadatas'][0][1]['Series_Title'])
print(results3['metadatas'][0][2]['Series_Title'])

The Fall
Sunrise: A Song of Two Humans
Le scaphandre et le papillon


In [79]:
# operamos con el operador and
results4 = db_no_embeddings.query(
    query_texts=['a history with elves and a ring'],
    n_results=2,
    where= { "$and": [
        {
            'IMDB_Rating':{
                '$gte': 5
            }
        },
        {
            'Released_Year':{
                '$ne': '2001'
            }
        }
    ]}
)

In [81]:
results4['metadatas'][0][0]['Series_Title']

'The Hobbit: The Desolation of Smaug'

### Cargar índice de Chroma previamente creado

In [None]:
client_persistent_2 = chromadb.PersistentClient(path="/content/data_embeddings")

In [None]:
db_2 = client_persistent_2.get_collection('movies_db_no_embeddigs')

In [None]:
db_2.peek(1)

# Pinecone

In [1]:
!pip install pinecone-client



In [40]:
from pinecone import Pinecone,ServerlessSpec,PodSpec
from getpass import getpass

In [3]:
pincone_api = getpass('Enter the secret value: ')

In [10]:
pc = Pinecone(api_key=pincone_api)

In [24]:
dimension = len(df['embeddings'][0])
dimension

384

In [45]:
index = pc.Index("embedding")

In [46]:
df = df.fillna(' ')

In [47]:
df

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross,text,embeddings,ids
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,A,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,80.0,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,28341469,Two imprisoned men bond over a number of years...,"[-0.11102542281150818, -0.009452976286411285, ...",0
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,A,175 min,"Crime, Drama",9.2,An organized crime dynasty's aging patriarch t...,100.0,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,134966411,An organized crime dynasty's aging patriarch t...,"[-0.10081295669078827, -0.021370654925704002, ...",1
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,UA,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,84.0,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,534858444,When the menace known as the Joker wreaks havo...,"[0.005191349890083075, 0.04237934201955795, -0...",2
3,https://m.media-amazon.com/images/M/MV5BMWMwMG...,The Godfather: Part II,1974,A,202 min,"Crime, Drama",9.0,The early life and career of Vito Corleone in ...,90.0,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1129952,57300000,The early life and career of Vito Corleone in ...,"[-0.07282990217208862, 0.007155457977205515, -...",3
4,https://m.media-amazon.com/images/M/MV5BMWU4N2...,12 Angry Men,1957,U,96 min,"Crime, Drama",9.0,A jury holdout attempts to prevent a miscarria...,96.0,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,689845,4360000,A jury holdout attempts to prevent a miscarria...,"[-0.08513770252466202, 0.05780964344739914, -0...",4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,https://m.media-amazon.com/images/M/MV5BNGEwMT...,Breakfast at Tiffany's,1961,A,115 min,"Comedy, Drama, Romance",7.6,A young New York socialite becomes interested ...,76.0,Blake Edwards,Audrey Hepburn,George Peppard,Patricia Neal,Buddy Ebsen,166544,,A young New York socialite becomes interested ...,"[0.028408067300915718, -0.02919202297925949, -...",995
996,https://m.media-amazon.com/images/M/MV5BODk3Yj...,Giant,1956,G,201 min,"Drama, Western",7.6,Sprawling epic covering the life of a Texas ca...,84.0,George Stevens,Elizabeth Taylor,Rock Hudson,James Dean,Carroll Baker,34075,,Sprawling epic covering the life of a Texas ca...,"[0.024076657369732857, -0.0367053858935833, 0....",996
997,https://m.media-amazon.com/images/M/MV5BM2U3Yz...,From Here to Eternity,1953,Passed,118 min,"Drama, Romance, War",7.6,"In Hawaii in 1941, a private is cruelly punish...",85.0,Fred Zinnemann,Burt Lancaster,Montgomery Clift,Deborah Kerr,Donna Reed,43374,30500000,"In Hawaii in 1941, a private is cruelly punish...","[-0.08005402237176895, 0.05756458640098572, -0...",997
998,https://m.media-amazon.com/images/M/MV5BZTBmMj...,Lifeboat,1944,,97 min,"Drama, War",7.6,Several survivors of a torpedoed merchant ship...,78.0,Alfred Hitchcock,Tallulah Bankhead,John Hodiak,Walter Slezak,William Bendix,26471,,Several survivors of a torpedoed merchant ship...,"[-0.020807620137929916, 0.05352265015244484, -...",998


In [None]:
from tqdm.auto import tqdm

# we will use batches of 64
batch_size=64

for i in tqdm(range(0, len(df), batch_size)):

    # find end of batch
    i_end = min(i+batch_size, len(df))
    # extract batch
    batch = df[i:i_end]
    # generate embeddings for batch
    ids = batch['ids']
    emb = batch['embeddings']
    metadata = batch.drop(['ids','embeddings','text'],axis=1).to_dict('records')

    # add all to upsert list
    to_upsert = list(zip(ids, emb, metadata))
    # update/insert these records to pinecone
    _ = index.upsert(to_upsert)

# check that we have all vectors in index
index.describe_index_stats()

In [56]:
from tqdm.auto import tqdm
batch_size = 64
for i in tqdm(range(0, len(df), batch_size)):
    i_end = min(i+batch_size, len(df))
    batch = df[i:i_end]
    ids = batch['ids']
    emb = batch['embeddings']
    metadata = batch.drop(['ids','embeddings','text'],axis=1).to_dict('records')
    to_upsert = list(zip(ids, emb, metadata))
    _ = index.upsert(to_upsert)
index.describe_index_stats()
    

  0%|          | 0/16 [00:00<?, ?it/s]

{'dimension': 384,
 'index_fullness': 0.00999,
 'namespaces': {'': {'vector_count': 999}},
 'total_vector_count': 999}

In [57]:
index.fetch(['0'])

{'namespace': '',
 'usage': {'read_units': 1},
 'vectors': {'0': {'id': '0',
                   'metadata': {'Certificate': 'A',
                                'Director': 'Frank Darabont',
                                'Genre': 'Drama',
                                'Gross': '28,341,469',
                                'IMDB_Rating': 9.3,
                                'Meta_score': 80.0,
                                'No_of_Votes': 2343110.0,
                                'Overview': 'Two imprisoned men bond over a '
                                            'number of years, finding solace '
                                            'and eventual redemption through '
                                            'acts of common decency.',
                                'Poster_Link': 'https://m.media-amazon.com/images/M/MV5BMDFkYTc0MGEtZmNhMC00ZDIzLWFmNTEtODM1ZmRlYWMwMWFmXkEyXkFqcGdeQXVyMTMxODk2OTU@._V1_UX67_CR0,0,67,98_AL_.jpg',
                                'Releas

In [53]:
index.delete(ids=['0'])

{}

### Pinecone query

In [58]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [61]:
query = 'a history of time travel'
query_embedding = model.encode(query).tolist()
query_embedding

[-0.0549708791077137,
 0.05026795715093613,
 -0.012098404578864574,
 0.046572159975767136,
 -0.0312655009329319,
 0.001558289397507906,
 0.039730653166770935,
 0.003980984911322594,
 -0.010791687294840813,
 -0.054410990327596664,
 0.055166952311992645,
 0.013507971540093422,
 -0.0647030770778656,
 0.06747356057167053,
 -0.10498882085084915,
 -0.025172339752316475,
 -0.0571795254945755,
 -0.009965495206415653,
 0.05063902959227562,
 -0.06064019352197647,
 -0.07859353721141815,
 -0.03197640925645828,
 -0.002687412314116955,
 0.03949083015322685,
 0.0386492982506752,
 0.07397972047328949,
 0.017026357352733612,
 -0.048958223313093185,
 -0.01474420353770256,
 -0.01744072511792183,
 -0.08089590072631836,
 0.0035485841799527407,
 -0.10366638004779816,
 -5.9719914133893326e-05,
 -0.035256870090961456,
 0.04028409719467163,
 0.07366015017032623,
 0.011518873274326324,
 0.049274198710918427,
 -0.012650789692997932,
 0.008346671238541603,
 0.012107228860259056,
 0.04001319035887718,
 0.047383785

In [62]:
responses = index.query(vector=query_embedding, top_k=5,include_metadata=True)

In [72]:
responses['matches'][0]['metadata']

{'Certificate': 'U',
 'Director': 'Mamoru Hosoda',
 'Genre': 'Animation, Adventure, Comedy',
 'Gross': ' ',
 'IMDB_Rating': 7.7,
 'Meta_score': ' ',
 'No_of_Votes': 60368.0,
 'Overview': 'A high-school girl named Makoto acquires the power to travel back in time, and decides to use it for her own personal benefits. Little does she know that she is affecting the lives of others just as much as she is her own.',
 'Poster_Link': 'https://m.media-amazon.com/images/M/MV5BMzA4ZGM1NjYtMjcxYS00MTdiLWJmNzEtMTUzODY0NDQ0YzUzXkEyXkFqcGdeQXVyMzYwMjQ3OTI@._V1_UY98_CR1,0,67,98_AL_.jpg',
 'Released_Year': '2006',
 'Runtime': '98 min',
 'Series_Title': 'Toki o kakeru shôjo',
 'Star1': 'Riisa Naka',
 'Star2': 'Takuya Ishida',
 'Star3': 'Mitsutaka Itakura',
 'Star4': 'Ayami Kakiuchi'}

### Filter

`
filter={
        "genre": {"$eq": "documentary"},
        "year": 2019
    }
`

The metadata filters can be combined with AND and OR:

$eq - Equal to (number, string, boolean)

$ne - Not equal to (number, string, boolean)

$gt - Greater than (number)

$gte - Greater than or equal to (number)

$lt - Less than (number)

$lte - Less than or equal to (number)

$in - In array (string or number)

$nin - Not in array (string or number)


In [78]:
query = 'a history of time travel'
query_embedding = model.encode(query).tolist()
responses = index.query(vector=query_embedding, top_k=5,include_metadata=True,
                        filters={ 
                            "Genre": {
                                "$eq": "Action, Adventure, Fantasy"
                            }
                        })

In [80]:
responses['matches'][0]['metadata']

{'Certificate': 'U',
 'Director': 'Mamoru Hosoda',
 'Genre': 'Animation, Adventure, Comedy',
 'Gross': ' ',
 'IMDB_Rating': 7.7,
 'Meta_score': ' ',
 'No_of_Votes': 60368.0,
 'Overview': 'A high-school girl named Makoto acquires the power to travel back in time, and decides to use it for her own personal benefits. Little does she know that she is affecting the lives of others just as much as she is her own.',
 'Poster_Link': 'https://m.media-amazon.com/images/M/MV5BMzA4ZGM1NjYtMjcxYS00MTdiLWJmNzEtMTUzODY0NDQ0YzUzXkEyXkFqcGdeQXVyMzYwMjQ3OTI@._V1_UY98_CR1,0,67,98_AL_.jpg',
 'Released_Year': '2006',
 'Runtime': '98 min',
 'Series_Title': 'Toki o kakeru shôjo',
 'Star1': 'Riisa Naka',
 'Star2': 'Takuya Ishida',
 'Star3': 'Mitsutaka Itakura',
 'Star4': 'Ayami Kakiuchi'}

### Load Index

In [None]:
pinecone.init(api_key=pincone_api, environment="_________")

In [None]:
index_2 = pinecone.Index('movies-emebeddings')

In [None]:
query = 'a history of an space journey'



In [None]:
responses