In [17]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from dotenv import load_dotenv

In [None]:
def load_pdf_file(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    
    document = loader.load()
    return document

In [4]:
extracted_date = load_pdf_file(data="../../data/")

In [7]:
# extracted_date

In [8]:
def text_split(extracted_date):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=30)
    chunks = text_splitter.split_documents(extracted_date)
    return chunks

In [9]:
chunks = text_split(extracted_date=extracted_date)
print("Length of chunks = ", len(chunks))

Length of chunks =  2310


In [18]:
# chunks

Now Download Model From Hugging Face for Vector Embeddings

In [8]:
def download_embedding_model():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [10]:
embeddings = download_embedding_model()

In [11]:
text = "Hi Hassan Chest Press"
vector = embeddings.embed_query(text)
print(len(vector))

384


Now Create The Vector Database

In [19]:
import os
load_dotenv()
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')

In [21]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "gym-chatbot"

pc.create_index(
    name=index_name,
    dimension=384,
    metric='cosine',
    spec=ServerlessSpec(
        cloud='aws',
        region='us-east-1'
    )
    
)

{
    "name": "gym-chatbot",
    "metric": "cosine",
    "host": "gym-chatbot-mapxah6.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 384,
    "deletion_protection": "disabled",
    "tags": null
}

In [27]:
import re

def clean_text(text):
    # Replace tabs and multiple spaces/newlines with a single space
    return re.sub(r'\s+', ' ', text).strip()

In [28]:
for doc in chunks:
    doc.page_content = clean_text(doc.page_content)

In [31]:
# chunks

In [2]:
from langchain_pinecone import PineconeVectorStore

vector_embeddings = PineconeVectorStore.from_documents(
    index_name=index_name,
    documents=chunks,
    embedding=embeddings
)

  from .autonotebook import tqdm as notebook_tqdm


KeyboardInterrupt: 

Now Create your query and create it into vector and search for it in pinecone db

In [12]:
from langchain_pinecone import PineconeVectorStore

index_name = "gym-chatbot"

docSearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

docSearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x24a2f48ee10>

In [13]:
retreiver = docSearch.as_retriever(search_type="similarity", search_kwargs={"k":3})


In [15]:
retreived_data = retreiver.invoke("What to eat to gain protein")
retreived_data

[Document(id='a7f378fd-9a3f-4a0a-90ba-56206d8f7b92', metadata={'author': 'Jim Stoppani', 'creationdate': '2019-03-21T00:47:55+00:00', 'creator': 'calibre 3.39.1 [https://calibre-ebook.com]', 'page': 1023.0, 'page_label': '1024', 'producer': 'calibre 3.39.1 [https://calibre-ebook.com]', 'source': '..\\..\\data\\gym-guide.pdf', 'title': "Jim Stoppani\\'s Encyclopedia of Muscle \\& Strength - PDFDrive.com", 'total_pages': 1173.0}, page_content='participants’ baseline intake or increase protein intake by at least 50 percent of that consumed by the lower-protein group or control group, then high protein intake does lead to significant gains in lean muscle mass and muscle strength. The bottom line is that eating a higher-protein diet is effective for gaining more muscle and increasing muscle strength. Shoot for 1 to 1.5 grams of protein per pound of body weight for optimal results. Goal 2: Get Ample Fat'),
 Document(id='8e2d8786-c36c-4c98-a069-9bfcbe9830e4', metadata={'author': 'Jim Stoppani

Now Lets work on the LLM

In [31]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from dotenv import load_dotenv

load_dotenv()

llm = ChatOpenAI(model="gpt-3.5-turbo")

input = "Suggest me a diet plan"
context = retreiver.invoke(input)

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a trained gym trainer who can provide information."
     "Use the chunks you recieve to answe the questions asked."
     "If something unrelated is asked from you just say something like I am a gym trainer. I don't know much about it. Sorry can't help"
     "Your first priority is to answer from the chunks {context}."),
    ("user", "{input}")
])


chain = prompt | llm | StrOutputParser()
response = chain.invoke({"context" : context, "input": input})

print(response)

For a starting point, a sample diet plan for those weighing between 160 to 200 pounds can be followed. It's important to modify the macronutrients and calories based on your body weight if you weigh significantly less or more than this range. Additionally, you can refer to Chapter 28: Nutrition for Maximizing Fat Loss in the book "Jim Stoppani's Encyclopedia of Muscle & Strength" for more detailed information on sample meal plans and steps to follow for an effective diet plan.
