In [2]:
import chromadb
import polars as pl
import openai
import chromadb.utils.embedding_functions as embedding_functions
import os

In [3]:
articles = pl.read_csv('Articles.csv', encoding='ISO-8859-1').with_row_index(offset=1)
print(articles.head())

shape: (5, 5)
┌───────┬──────────────────────┬──────────┬─────────────────────┬──────────┐
│ index ┆ Article              ┆ Date     ┆ Heading             ┆ NewsType │
│ ---   ┆ ---                  ┆ ---      ┆ ---                 ┆ ---      │
│ u32   ┆ str                  ┆ str      ┆ str                 ┆ str      │
╞═══════╪══════════════════════╪══════════╪═════════════════════╪══════════╡
│ 1     ┆ KARACHI: The Sindh   ┆ 1/1/2015 ┆ sindh govt decides  ┆ business │
│       ┆ government …         ┆          ┆ to cut publ…        ┆          │
│ 2     ┆ HONG KONG: Asian     ┆ 1/2/2015 ┆ asia stocks up in   ┆ business │
│       ┆ markets start…       ┆          ┆ new year tra…       ┆          │
│ 3     ┆ HONG KONG:  Hong     ┆ 1/5/2015 ┆ hong kong stocks    ┆ business │
│       ┆ Kong shares o…       ┆          ┆ open 0.66 per…      ┆          │
│ 4     ┆ HONG KONG: Asian     ┆ 1/6/2015 ┆ asian stocks sink   ┆ business │
│       ┆ markets tumbl…       ┆          ┆ euro near ni…     

In [4]:
client = chromadb.PersistentClient(path='./persistentdb')


In [5]:
collection = client.get_or_create_collection(name="articles")


In [6]:
import chromadb.utils.embedding_functions as embedding_functions
openai_ef=embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.getenv("API_KEY"),
    model_name="text-embedding-ada-002"
)

In [7]:
articles

index,Article,Date,Heading,NewsType
u32,str,str,str,str
1,"""KARACHI: The Sindh government …","""1/1/2015""","""sindh govt decides to cut publ…","""business"""
2,"""HONG KONG: Asian markets start…","""1/2/2015""","""asia stocks up in new year tra…","""business"""
3,"""HONG KONG: Hong Kong shares o…","""1/5/2015""","""hong kong stocks open 0.66 per…","""business"""
4,"""HONG KONG: Asian markets tumbl…","""1/6/2015""","""asian stocks sink euro near ni…","""business"""
5,"""NEW YORK: US oil prices Monday…","""1/6/2015""","""us oil prices slip below 50 a …","""business"""
…,…,…,…,…
2688,"""strong>DUBAI: Dubai Internatio…","""3/25/2017""","""Laptop ban hits Dubai for 11m …","""business"""
2689,"""strong>BEIJING: Former Prime M…","""3/26/2017""","""Pak China relations not agains…","""business"""
2690,"""strong>WASHINGTON: Uber has gr…","""3/26/2017""","""Uber grounds self driving cars…","""business"""
2691,"""strong>BEIJING: The New Develo…","""3/27/2017""","""New Development Bank plans joi…","""business"""


In [8]:
articles=articles[:50]

In [9]:
articles

index,Article,Date,Heading,NewsType
u32,str,str,str,str
1,"""KARACHI: The Sindh government …","""1/1/2015""","""sindh govt decides to cut publ…","""business"""
2,"""HONG KONG: Asian markets start…","""1/2/2015""","""asia stocks up in new year tra…","""business"""
3,"""HONG KONG: Hong Kong shares o…","""1/5/2015""","""hong kong stocks open 0.66 per…","""business"""
4,"""HONG KONG: Asian markets tumbl…","""1/6/2015""","""asian stocks sink euro near ni…","""business"""
5,"""NEW YORK: US oil prices Monday…","""1/6/2015""","""us oil prices slip below 50 a …","""business"""
…,…,…,…,…
46,"""Karachi: Microsoft Devices Pak…","""2/12/2015""","""nokia 215 dual sim launched in…","""business"""
47,"""ISLAMABAD: Federal Finance Min…","""2/12/2015""","""cnic number now tax number onl…","""business"""
48,"""ISLAMABAD: Government has put …","""2/12/2015""","""govt imposes new taxes of rs4 …","""business"""
49,"""Singapore: Oil prices edged hi…","""2/12/2015""","""oil prices rise in asian trad""","""business"""


In [10]:
articles_list = articles['Article'].to_list()

# Generate OpenAI embeddings
vectors = openai_ef(articles_list)

# Generate unique IDs for each article
ids = [f"id{x}" for x in articles['index'].to_list()]

In [11]:
collection.upsert(
    documents=articles_list,
    ids=ids,
    embeddings=vectors,
)

In [12]:
articles['Article'][1]

'HONG KONG: Asian markets started 2015 on an upswing in limited trading on Friday, with mainland Chinese stocks surging in Hong Kong on speculation Beijing may ease monetary policy to boost slowing growth.Hong Kong rose 1.07 percent, closing 252.78 points higher at 23857.82.Seoul closed up 0.57 percent, rising 10.85 points to 1,926.44, while Sydney gained 0.46 percent, or 24.89 points, to close at 5,435.9.Singapore edged up 0.19 percent, gaining 6.39 points to 3,371.54.Markets in mainland China, Japan, Taiwan, New Zealand, the Philippines, and Thailand remained closed for holidays.With mainland bourses shut until January 5, shares in Chinese developers and financial companies surged in Hong Kong, stoked by hopes that Beijing could ease monetary policy to support lagging growth in the world´s second-largest economy.China Vanke, the country´s biggest developer by sales, leapt 10.8 percent and the People´s Insurance Company (Group) of China Ltd. was up 5.51 percent in afternoon trading.Tr

In [13]:
query='HONG KONG: Asian markets started 2015 on an upswing in limited trading on Friday'
query_embeddings=openai_ef([query])
collection.query(
    query_embeddings=query_embeddings,
    n_results=1
    
)

{'ids': [['id2']],
 'embeddings': None,
 'documents': [['HONG KONG: Asian markets started 2015 on an upswing in limited trading on Friday, with mainland Chinese stocks surging in Hong Kong on speculation Beijing may ease monetary policy to boost slowing growth.Hong Kong rose 1.07 percent, closing 252.78 points higher at 23857.82.Seoul closed up 0.57 percent, rising 10.85 points to 1,926.44, while Sydney gained 0.46 percent, or 24.89 points, to close at 5,435.9.Singapore edged up 0.19 percent, gaining 6.39 points to 3,371.54.Markets in mainland China, Japan, Taiwan, New Zealand, the Philippines, and Thailand remained closed for holidays.With mainland bourses shut until January 5, shares in Chinese developers and financial companies surged in Hong Kong, stoked by hopes that Beijing could ease monetary policy to support lagging growth in the world´s second-largest economy.China Vanke, the country´s biggest developer by sales, leapt 10.8 percent and the People´s Insurance Company (Group) o

In [14]:
query='Its private bank, especially its Swiss arm, had undergone "a radical transformation" in recent years, '
query_embeddings=openai_ef([query])
collection.query(
    query_embeddings=query_embeddings,
    n_results=3
)

{'ids': [['id41', 'id27', 'id30']],
 'embeddings': None,
 'documents': [['LONDON: British bank HSBC Holdings Plc admitted on Sunday failings by its Swiss subsidiary, in response to media reports it helped wealthy customers dodge taxes and conceal millions of dollars of assets."We acknowledge and are accountable for past compliance and control failures," HSBC said on Sunday after news outlets including French newspaper Le Monde and Britain\x92s The Guardian published allegations about its Swiss private bank. The Guardian, along with other news outlets, cited documents obtained by the International Consortium of Investigative Journalists (ICIJ) via Le Monde.HSBC said that its Swiss arm had not been fully integrated into HSBC after its purchase in 1999, allowing "significantly lower" standards of compliance and due diligence to persist.The Guardian alleged in its report that the files showed HSBC\'s Swiss bank routinely allowed clients to withdraw \x93bricks\x94 of cash, often in foreign 

In [15]:
query='what is the deal with price of petrol'
query_embeddings=openai_ef([query])
collection.query(
    query_embeddings=query_embeddings,
    n_results=1
    
)

{'ids': [['id24']],
 'embeddings': None,
 'documents': [['ISLAMABAD: In a move to give relief to consumers, sources in the Finance Ministry said on Tuesday that the price of petrol and petroleum products are expected to decrease further from February 1.According to sources, the price of petrol is expected to be slashed by Rs 10 per litre, High Speed Diesel by Rs 8.50 per litre, Light Diesel by Rs 11 per litre, HOBC by Rs 14 per litre, and Kerosene by Rs 12 per litre.Global crude oil prices have fallen by 50 percent since June 2014, and to provide consistent relief to consumers, the Pakistan government has decreased the price of petrol by Rs 29 since the last four months and brought the price of Diesel down by Rs 23 in the same time frame.Fuel crisis in the country began last week when Pakistan State Oil (PSO) was forced to slash imports because banks refused to extend any more credit to the government-owned company, which supplies 80 percent of the country´s oil.The shortfall led to lo

In [16]:
query='ISLAMABAD: In a move to give relief to consumers,'
query_embeddings=openai_ef([query])
collection.query(
    query_embeddings=query_embeddings,
    #n_results=1,
    where={"NewsType":"business"}
)

{'ids': [[]],
 'embeddings': None,
 'documents': [[]],
 'uris': None,
 'data': None,
 'metadatas': [[]],
 'distances': [[]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}