In [16]:
import getpass
import os

api_key = getpass.getpass("Enter your Google API Key: ")
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = api_key
os.environ["GOOGLE_API_KEY"] = getpass.getpass()

print(os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"))

from langchain_google_vertexai import ChatVertexAI

model = ChatVertexAI(model="gemini-1.5-flash-002")

D:\datahackaton-projekt-18-281a9f33858f.json


In [17]:
from langchain_core.messages import HumanMessage, SystemMessage

messages = [
    SystemMessage(content="Translate the following from English into Russian"),
    HumanMessage(content="Hello my dear friend! How are you?"),
]

result = model.invoke(messages)

In [3]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()
parser.invoke(result)

'Привет, мой дорогой друг! Как дела?\n'

In [4]:
from langchain_core.prompts import ChatPromptTemplate

system_template = "Translate the following into {language}:"

In [5]:
prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{text}")]
)

In [6]:
result = prompt_template.invoke({"language": "italian", "text": "hi"})

result

ChatPromptValue(messages=[SystemMessage(content='Translate the following into italian:', additional_kwargs={}, response_metadata={}), HumanMessage(content='hi', additional_kwargs={}, response_metadata={})])

In [7]:
result.to_messages()

[SystemMessage(content='Translate the following into italian:', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='hi', additional_kwargs={}, response_metadata={})]

In [8]:
chain = prompt_template | model | parser

In [9]:
chain.invoke({"language": "italian", "text": "hi"})

'Ciao\n'

In [49]:
from langchain_core.documents import Document

documents = [
    Document(page_content="Dogs are great companions, known for their loyalty and friendliness.", metadata={"source": "mammal-pets-doc", "pet_type": "dog"}),
    Document(page_content="Cats are independent pets that often enjoy their own space.", metadata={"source": "mammal-pets-doc", "pet_type": "cat"}),
    Document(page_content="Goldfish are popular pets for beginners, requiring relatively simple care.", metadata={"source": "fish-pets-doc", "pet_type": "fish"}),
    Document(page_content="Parrots are intelligent birds capable of mimicking human speech.", metadata={"source": "bird-pets-doc", "pet_type": "bird"}),
    Document(page_content="Rabbits are social animals that need plenty of space to hop around.", metadata={"source": "mammal-pets-doc", "pet_type": "rabbit"}),
    Document(page_content="Choosing a pet depends on your lifestyle and living situation.", metadata={"source": "pet-advice-doc", "topic": "pet selection"}),
    Document(page_content="Proper pet care involves providing adequate food, water, and enrichment.", metadata={"source": "pet-advice-doc", "topic": "pet care"})
]

In [50]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

os.environ["GOOGLE_API_KEY"] = ""

embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectorstore = Chroma.from_documents(
    documents,
    embedding=embedding,
)

In [51]:
vectorstore.similarity_search("cat")

[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'pet_type': 'cat', 'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'source': 'pet-advice-doc', 'topic': 'pet selection'}, page_content='Choosing a pet depends on your lifestyle and living situation.')]

In [53]:
vectorstore.similarity_search_with_score("cat", k=7)

[(Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
  0.6390674114227295),
 (Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
  0.6390674114227295),
 (Document(metadata={'pet_type': 'cat', 'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
  0.6390674114227295),
 (Document(metadata={'source': 'pet-advice-doc', 'topic': 'pet selection'}, page_content='Choosing a pet depends on your lifestyle and living situation.'),
  0.8389304876327515),
 (Document(metadata={'source': 'pet-advice-doc', 'topic': 'pet care'}, page_content='Proper pet care involves providing adequate food, water, and enrichment.'),
  0.8391101360321045),
 (Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
  0.8766832947731018),
 (Doc

In [29]:
vectorstore.similarity_search_by_vector(embedding.embed_query("cat"))

[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')]

In [54]:
# TODO : Set values as per your requirements
# Project and Storage Constants
PROJECT_ID = "datahackaton-projekt-18"
REGION = "us-central1"
BUCKET = "example-model-1"
BUCKET_URI = f"gs://{BUCKET}"

# The number of dimensions for the textembedding-gecko@003 is 768
# If other embedder is used, the dimensions would probably need to change.
DIMENSIONS = 768

# Index Constants
DISPLAY_NAME = "my_matching_engine_index_id"
DEPLOYED_INDEX_ID = "my_matching_engine_endpoint_id"

In [57]:
gcloud storage buckets create $BUCKET_URI --project=$PROJECT_ID --location=$REGION
from google.cloud import aiplatform
from langchain_google_vertexai import VertexAIEmbeddings
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)
embedding_model = VertexAIEmbeddings(model_name="textembedding-gecko@003")

SyntaxError: invalid syntax (768178236.py, line 1)

In [58]:
my_index = aiplatform.MatchingEngineIndex.create_tree_ah_index(
    display_name=DISPLAY_NAME,
    dimensions=DIMENSIONS,
    approximate_neighbors_count=150,
    distance_measure_type="DOT_PRODUCT_DISTANCE",
    index_update_method="STREAM_UPDATE",  # allowed values BATCH_UPDATE , STREAM_UPDATE
)

Creating MatchingEngineIndex
Create MatchingEngineIndex backing LRO: projects/401672915742/locations/us-central1/indexes/5992861738914021376/operations/5834784574231543808
MatchingEngineIndex created. Resource name: projects/401672915742/locations/us-central1/indexes/5992861738914021376
To use this MatchingEngineIndex in another session:
index = aiplatform.MatchingEngineIndex('projects/401672915742/locations/us-central1/indexes/5992861738914021376')
