- Groq is a company that develops high-performance hardware and software solutions for AI inference. Their core product, the LPU™   Inference Engine, is designed to accelerate AI tasks, offering significant speed improvements. It provides superior compute power, enabling faster processing of AI models. The platform is optimized for energy efficiency, reducing operational costs. Overall, Groq aims to enhance AI performance across various applications, including machine learning and deep learning workloads.
- https://console.groq.com/playground

In [1]:
import os
from dotenv import load_dotenv
import openai
load_dotenv()

openai.api_key=os.getenv('OPENAI_API_KEY')

groq_api_key=os.getenv('GROQ_API_KEY')

In [2]:
from langchain_openai import ChatOpenAI
from langchain_groq import ChatGroq

groq_model = ChatGroq(model="gemma2-9b-it",groq_api_key=groq_api_key)

openai_model = ChatOpenAI(model="gpt-4o")


In [3]:
from langchain_core.messages import SystemMessage,AIMessage,HumanMessage

message = [
        SystemMessage("Translate from English to Hindi"),
        HumanMessage(content="Hello how are you")
]

response1 = groq_model.invoke(message)
response2 = openai_model.invoke(message)

print(response1)
print("\n"*2)
print(response2)


content='नमस्ते, आप कैसे हैं? \n\n(Namaste, aap kaise hain?) \n' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 24, 'prompt_tokens': 18, 'total_tokens': 42, 'completion_time': 0.043636364, 'prompt_time': 8.0859e-05, 'queue_time': 0.021173831, 'total_time': 0.043717223}, 'model_name': 'gemma2-9b-it', 'system_fingerprint': 'fp_10c08bf97d', 'finish_reason': 'stop', 'logprobs': None} id='run-92769055-5a5d-47e6-b43e-481892a155fd-0' usage_metadata={'input_tokens': 18, 'output_tokens': 24, 'total_tokens': 42}



content='नमस्ते, आप कैसे हैं?' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 20, 'total_tokens': 30, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_50cad350e4', 'finish_rea

In [4]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

parser.invoke(response1)

'नमस्ते, आप कैसे हैं? \n\n(Namaste, aap kaise hain?) \n'

In [5]:
#using LCEL we can chain the components

In [None]:
chain1= groq_model|parser

print(chain1.invoke(message))

chain2= openai_model|parser

print(chain2.invoke(message))


नमस्ते, आप कैसे हैं? 
(Namaste, aap kaise hain?) 


This translates to "Hello, how are you?" in Hindi. 

नमस्ते, आप कैसे हैं?


In [7]:
#another efficient method is using prompt templates

from langchain_core.prompts import ChatPromptTemplate

generic_prompt = "Translate into following {language}"

prompt = ChatPromptTemplate.from_messages([("system",generic_prompt),("user","{text}")])

prompt.invoke({"language":"Hindi","text":"Hello this is shanmukh"})

chain3 = prompt | groq_model | parser

chain3.invoke({"language":"Hindi","text":"Hello this is shanmukh"})



'नमस्ते, मैं शंमूख हूँ। \n\n(Namaste, main Shanmukh hoon.) \n'

In [84]:
%%writefile lama_rag.py

import streamlit as st 
import os
from langchain_groq import ChatGroq
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.vectorstores import FAISS  
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_groq import ChatGroq

load_dotenv()

groq_api_key=os.getenv('GROQ_API_KEY','xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')

groq_model = ChatGroq(model_name="llama-3.2-1b-preview",groq_api_key=groq_api_key)

generic_prompt = """
                    Answer the following questions based on given context only.
                    Please provide most accurate response based on question.
                    <context>
                    {context}
                    </context>
                """

prompt = ChatPromptTemplate.from_messages([("system",generic_prompt),("user","{input}")])


def create_vector_embeddings_ollama():
    if "vectors" not in st.session_state:
        st.session_state.embeddings = OpenAIEmbeddings()
        st.session_state.loader = PyPDFDirectoryLoader("research") #data ingestion step
        st.session_state.docs  = st.session_state.loader.load() #documents loading
        st.session_state.text_splitter =  RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
        st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs)
        st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents,st.session_state.embeddings)
        
user_prompt = st.text_input("Enter the query from the documents")
if st.button("Document Embedding"):
    create_vector_embeddings_ollama()
    st.write("Vector db is ready")

if user_prompt:
    document_chain = create_stuff_documents_chain(groq_model,prompt)
    retriver  =  st.session_state.vectors.as_retriever()
    retriver_chain = create_retrieval_chain(retriver,document_chain)
    response = retriver_chain.invoke({'input':user_prompt})
    st.write(response['answer'])
    




Overwriting lama_rag.py


In [85]:
import subprocess

process = subprocess.Popen(["streamlit","run","lama_rag.py"])

print("process",process)

process <Popen: returncode: None args: ['streamlit', 'run', 'lama_rag.py']>


In [78]:
process.terminate()