# Playground for trying out all sorts of libraries!

## Testing The Cohere Embeding Model

In [12]:
# Step 0) Handle API Key's Securely
import os
from dotenv import load_dotenv
load_dotenv()

True

In [13]:
# Step 1) Use Cohere's Embed Model to create text embeddings

# Setup Cohere
import cohere
co = cohere.Client(os.environ.get('COHERE_API_KEY'))

to_embed = [
    "I've heard USC is called the University of Spoiled Children. Do you find this to be true?",
    "Not at all! During my time at USC, I have always felt surrounded by an extremely diverse student community. Students come from all types of backgrounds and this really adds to a cohesive and enriching student dynamic. That being said, I think that regardless of a USC student's socio-economic background, everyone at USC works hard and comes together to explore academic and extracurricular interests."
]

In [14]:
# Embed Sentences
# def get_embedding(sent: str) -> :

sentence_embeddings = co.embed(
    texts=to_embed,
    model="embed-english-v3.0",
    input_type="search_document",
    truncate='END',
    embedding_types=['float']
).embeddings.float

In [15]:
# Import Pinecone
from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key=os.environ.get('PINECONE_API_KEY'))

  from tqdm.autonotebook import tqdm


## Testing the Pinecone Indexing / Endpoints

In [16]:
index_name = "cohere-test-embeddings"
existing_indexes = [
    index["name"] for index in pc.list_indexes()
]
index_exists = index_name in existing_indexes

In [17]:
print (index_name in pc.list_indexes())

print(pc.list_indexes())

False
{'indexes': [{'dimension': 8,
              'host': 'cohere-test-embeddings-1efl3et.svc.apw5-4e34-81fa.pinecone.io',
              'metric': 'cosine',
              'name': 'cohere-test-embeddings',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-west-2'}},
              'status': {'ready': True, 'state': 'Ready'}},
             {'dimension': 8,
              'host': 'quickstart-first-1efl3et.svc.apw5-4e34-81fa.pinecone.io',
              'metric': 'euclidean',
              'name': 'quickstart-first',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-west-2'}},
              'status': {'ready': True, 'state': 'Ready'}}]}


In [18]:
# Create an Index
if not index_exists:
    pc.create_index(
        name=index_name,
        dimension=8,
        metric="cosine",
        spec=ServerlessSpec(
            cloud='aws',
            region='us-west-2'
        )
    )

## Understanding LangChain Expression Language(LCEL) under the hood

In [19]:
# https://www.youtube.com/watch?v=O0dUOtOIrfs
# 1st Function | 2nd Function
class MyRunnable:
    def __init__(self, the_first_function):
        self.first_function = the_first_function

    def __or__(self, second_function):
        def chained_function(*args, **kwargs):
            evaluate_first_function = self.first_function(*args, **kwargs)
            return second_function(evaluate_first_function)
        return MyRunnable(chained_function)

    # Note: we overload `operator()` instead of `invoke()` in this example
    def __call__(self, *args, **kwargs):
        return self.first_function(*args, **kwargs)


def add_five(x):
    return x + 5

def mult_two(x):
    return x * 2

add_five_runnable = MyRunnable(add_five)
mult_two_runnable = MyRunnable(mult_two)

chain_add_five_mult_two = add_five_runnable.__or__(mult_two_runnable)

chain_add_five_mult_two(1)

12

### Example of LCEL using local PineCone

In [46]:
import os
# Import Pinecone
from pinecone import Pinecone, ServerlessSpec
from langchain.embeddings import CohereEmbeddings
from langchain_pinecone import PineconeVectorStore

pc = Pinecone(api_key=os.environ.get('PINECONE_API_KEY'))
index_name = "langchain-test-index-3"

# index_name = "cohere-test-embeddings"
existing_indexes = [
    index["name"] for index in pc.list_indexes()
]
index_exists = index_name in existing_indexes

# Create an Index
if not index_exists:
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud='aws',
            region='us-west-2'
        )
    )

embeddings_model = CohereEmbeddings(
    model="embed-english-light-v3.0",
    cohere_api_key=os.environ.get("COHERE_API_KEY"),
)

vectorstore_a = PineconeVectorStore(
    embedding=embeddings_model,
    index_name=index_name
)


In [47]:
vectorstore_a.add_texts(["USC is the best school in the world", 
                         "USC is known for being awesome because it is very warm during the summer",
                        "USC has the best college of Letter's of Arts and Sciences called Dornsife Viterbi",
                         "USC has the best dance school, Kauffman, better than any other dance school in the world (Juliard)",
                         "USC is known for being bad because it has many hard linguistics and CS classes",
                         "USC is bad because Complex Analysis is too Complex, and Accounting is not accounted for"]
                        )

['f18a82a0-cc1d-4457-aa9e-cf5dec532b5a',
 '7e0f3e02-4bde-4150-b324-57a84a4c896d',
 '9d695282-f69e-45d4-9733-e0d26714e537',
 '4de460d7-7a98-43b2-a655-8bbda4d18f96',
 'a4b3c1fb-a192-4e1c-9d8e-4c96db8916a0']

In [48]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain.prompts import PromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain_community.llms import Cohere

llm = Cohere(model="command")

retriever = vectorstore_a.as_retriever()
prompt_str = """
Question: {question}

Context: {context}

Answer:
"""

prompt =  PromptTemplate.from_template(prompt_str)

retrieve_context_and_get_quality = RunnableParallel({"question": RunnablePassthrough(), "context": retriever})

chain = retrieve_context_and_get_quality | prompt | llm  | StrOutputParser()

In [50]:
out = chain.invoke("Why is USC bad?")
print(out)

 USC is bad because it has very difficult linguistics and CS courses. However, it is also known to be awesome because of the great weather during the summer. 


In [41]:
# Cleanup
pc.delete_index(name=index_name)