### TESTING DATABASE CREATION AND CONNECTION

In [9]:
from database.neo4jConnection_utils import db_connection, get_credential
import database.create_db as create_db

cred = get_credential()
client_gds = db_connection(cred)

create_db.create_constraints(client_gds)
create_db.create_db(client_gds)

# ****sanity check******
query = """
    MATCH (c:Customer)-[p:PURCHASED]->(a:Article)
    RETURN a.articleId AS Article, COUNT(p) AS Purchases
    ORDER BY Purchases DESC
    LIMIT 5
"""

# Execute the Cypher query using the gds_client
client_gds.run_cypher(query)




LOG connection to DB:
                                            key       value
0                                    gdsVersion       2.8.0
1                                    gdsEdition  Unlicensed
2                                  neo4jVersion      5.22.0
3                    minimumRequiredJavaVersion          17
4                      unavailableCompatibility  Neo4j 5.16
..                                          ...         ...
90                 server.memory.pagecache.size   536870912
91  server.memory.off_heap.transaction_max_size  2147483648
92            dbms.memory.transaction.total.max  8589934592
93              db.memory.transaction.total.max           0
94                    db.memory.transaction.max           0

[95 rows x 2 columns]
--------------> connection to db is OK
LOG: constraints are created
staging 266 records

Using This Cypher Query:
```
UNWIND $recs AS rec
MERGE(n:Department {departmentNo: rec.departmentNo})
SET n.departmentName = rec.departmentName, n

Unnamed: 0,Article,Purchases
0,706016001,38
1,706016002,36
2,759871002,25
3,573716012,22
4,399256001,21


## TESTING CONNECTION WITH OLLAMA

In [6]:
from database.neo4jConnection_utils import get_credential

cred = get_credential()

In [7]:
# OLLAMA: SEE XTERM CONSOLE ABOVE TO CHECK FOR OTHER MODEL TO PULL
# Import Ollama module from Langchain
from langchain_community.llms import Ollama
LLM = cred['LLM']
print('--------->', LLM)
# Initialize an instance of the Ollama model
llm = Ollama(model=LLM)

test = True
if test:
  # Invoke the model to generate responses
  prompt = "Tell me a joke in two line"
  response = llm.invoke(prompt)
  print(f'***OLLAMA SANITY CHECK***\nprompt:{prompt}', '\nresponse:', response)

---------> llama3.1:8b
***OLLAMA SANITY CHECK***
prompt:Tell me a joke in two line 
response: Here is a two-line joke:

Why couldn't the bicycle stand up by itself?
Because it was two-tired.


In [8]:
from langchain_community.embeddings.ollama import OllamaEmbeddings

if 'llama' in LLM:
  embedding_model = OllamaEmbeddings(model=LLM, base_url='http://localhost:11434')
  embedding_dimension = 4096
  print('USING OLLAMA')
else:
  print('WARNING not running on llama instance')

if test:
  test_embedding = embedding_model.embed_query("My query to look up")
  print('***OLLAMA EMBEDDING SANITY CHECK*****')
  print('len(test_embedding) -> ', len(test_embedding))
  print('test_embedding -> ', test_embedding[0:10])

USING OLLAMA
***OLLAMA EMBEDDING SANITY CHECK*****
len(test_embedding) ->  4096
test_embedding ->  [2.1036903858184814, -3.066297769546509, 0.3909342885017395, 0.4271194040775299, -0.624779224395752, -1.34486722946167, 2.5913209915161133, 1.1203879117965698, -3.3904855251312256, -0.4280732274055481]


### TESTING DESCRIPTION EMBEDDING

In [9]:
%load_ext autoreload
%autoreload 2
import pandas as pd
from embeddings.utils import get_full_text
from embeddings.embed_description import embed_description

product_df = pd.read_csv('dataset/product.csv')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
description_df = get_full_text(product_df)

In [11]:
print(description_df.head())

   productCode                                               text
0       108775  \n##Product\nName: Strap top\nType: Vest top\n...
1       110065  \n##Product\nName: OP T-shirt (Idro)\nType: Br...
2       111565  \n##Product\nName: 20 den 1p Stockings\nType: ...
3       111586  \n##Product\nName: Shape Up 30 den 1p Tights\n...
4       111593  \n##Product\nName: Support 40 den 1p Tights\nT...


In [12]:
description_df = embed_description(description_df, embedding_model)

Embedded 500 of 8018


KeyboardInterrupt: 