In [1]:
import os
import chromadb
import numpy as np
from PIL import Image
from common.article_scraper import ArticleScraper
from vector_db.utils import ImageEmbeddingFunction, TextEmbeddingFunction, format_document

client = chromadb.PersistentClient(path=os.path.join("vector_db","vector_db"))

#Initialise Text Vector Database
text_fn = TextEmbeddingFunction()
text_collection = client.get_or_create_collection(name="text_collection",embedding_function=text_fn)

#Initialise Image Vector Database
img_fn = ImageEmbeddingFunction()
img_collection = client.get_or_create_collection(name="img_collection",embedding_function=img_fn)

#To get query articles
articleScraper = ArticleScraper()

test_img = np.array(Image.open("img_2.jpg"))


  from .autonotebook import tqdm as notebook_tqdm
<All keys matched successfully>


In [6]:
query_payload = articleScraper.scrape(
    "https://timesofmalta.com/article/man-dies-traffic-accident-swieqi.1090488"
)
if query_payload.error:
    print(query_payload.error)

query_document = format_document(query_payload.data,query=True)    

x = text_collection.query(
    query_texts=query_document
)
for d,m in zip(x['distances'][0],x['metadatas'][0]):
    print(d,m['title'])

0.2238815426826477 Man dies after Swieqi car crash
0.4240127205848694 Man in hospital after traffic accident in Regional Road
0.5495119690895081 Toddler and Transport Malta worker sustain grievous injuries after two separate accidents
0.5752490758895874 Motorcyclist injured in Marsa collision with car
0.5798760056495667 Woman hospitalised after traffic accident involving forklift in Imqabba
0.5967904329299927 Man in critical condition after van toppled over and hit him
0.597992479801178 Accident near St Julian's tunnels paralyses Maltese roads as traffic stretches on to Żejtun
0.6013765335083008 Updated | Elderly woman, 80, dies after getting caught under coach wheels in Ħal Far
0.6172170042991638 Motorcyclist grievously injured in Fgura crash
0.624396562576294 Elderly man grievously injured after being hit by car driven by a 74-year-old woman


In [10]:
x = img_collection.query(
    query_embeddings=img_fn(test_img)
)

captions = [img["caption"] for img in x["metadatas"][0]]
article_ids = [img['article_id'] for img in x["metadatas"][0]]

articles = []
for id in article_ids:
    articles.append(
        text_collection.get(ids=id)['metadatas'][0]['url']
    )

for c,a in zip(captions,articles):
    print(f'{c}\n{a}\n')

Former prime minister Joseph Muscat leaving court earlier this week. Photo: Matthew Mirabelli
https://timesofmalta.com/article/joseph-muscat-asks-police-commissioner-call-questioning.1091890

Ex-prime minister Joseph Muscat, centre, is expected to be charged over the hospitals’ deal. Photo: Matthew Mirabelli
https://timesofmalta.com/article/hospitals-deal-how-get-here.1091958

Former prime minister Joseph Muscat (left) and his chief of staff Keith Schembri (right)
https://www.maltatoday.com.mt//news/national/128573/pn_police_attorney_general_must_prosecute_keith_schembri_and_joseph_muscat


https://www.maltatoday.com.mt//news/national/128877/abela_warns_judiciary_not_to_engage_in_political_terrorism

Mizzi&rsquo;s successor as health minister Chris Fearne as well as ex-finance minister Edward Scicluna are also reportedly facing the prospects of criminal charges.
https://timesofmalta.com/article/hospitals-deal-how-get-here.1091958

Addressing a press conference outside the courthouse on

In [6]:
text_collection.get(ids=article_ids[0])

{'ids': ['6d1a695ac3b4e94b9ab60a5dbcaec544'],
 'embeddings': None,
 'metadatas': [{'body': "Former prime minister Joseph Muscat has written to Police Commissioner Angelo Gafa asking that he is called in for questioning over the hospitals' inquiry.\n In a Facebook post on Friday, Muscat said he was prepared to answer every question related to the inquiry, as tension mounts over the hospitals debacle.\n “I have nothing to hide, and have no qualms with being transparent… I wrote to the police commissioner and told him to call me in as soon as possible and as soon as he has the conclusions of the Attorney General on the hospitals' inquiry, [so I can] answer any question he may have and show the facts,” he said.\n “As I already said this is another Egrant (the secret Panama company he was linked to). This is a political vendetta against me and my family. I will remain determined that the truth will come out,” he added.\n It emerged on Tuesday that a magisterial inquiry into Muscat and other