<a href="https://colab.research.google.com/github/OMarieH/FITPT/blob/main/capstone_fitpt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U langchain-openai
!pip install -qU langchain
!pip install pinecone-client

In [None]:
# Import Colab Secrets userdata module
from google.colab import userdata

# Set OpenAI API key
import os
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

# Set other API keys similarly
os.environ["PINECONE_API_KEY"] = userdata.get('PINECONE_API_KEY')

In [None]:
import time
from pinecone import Pinecone
from pinecone import ServerlessSpec

pc = Pinecone(os.environ["PINECONE_API_KEY"])

spec = ServerlessSpec(cloud="aws", region="us-east-1")

index_name = 'fit-pt-rag-test'

# check if index already exists (it shouldn't if this is your first run)
if index_name not in pc.list_indexes().names():
    # if does not exist, create index
    pc.create_index(
        index_name,
        dimension=1536,
        metric='euclidean',
        spec=spec
    )
    # wait for index to be initialized
    while not pc.describe_index(index_name).status['ready']:
        time.sleep(1)

# connect to index
index = pc.Index(index_name)
time.sleep(1)
# view index stats
index.describe_index_stats()


{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 486}},
 'total_vector_count': 486}

In [None]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = WebBaseLoader("https://health.clevelandclinic.org/how-to-overcome-gym-anxiety-gymtimidation")
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=500)
documents = text_splitter.split_documents(docs)

len(documents)

31

In [None]:
from langchain.vectorstores import  Pinecone
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"])
docsearch = Pinecone.from_texts([documents[0].page_content for t in docs], embeddings, index_name=index_name)

In [None]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 486}},
 'total_vector_count': 486}

In [None]:
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA

llm = ChatOpenAI(
    openai_api_key=os.environ["OPENAI_API_KEY"],
    model_name='gpt-3.5-turbo',
    temperature=0.0)

retriever = docsearch.as_retriever()
docs = retriever.invoke("question")

In [None]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import ResponseSchema, StructuredOutputParser

response_schemas = [
    ResponseSchema(name="excercise", description="The list of exercises for a workout plan"),
    ResponseSchema(name="journal", description="The questions for the fitness journal",)]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)


In [None]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

format_instructions = output_parser.get_format_instructions()

# Prompt
prompt = ChatPromptTemplate.from_template(
    template="""You are a Personal Fitness Trainer. First Generate a detailed list of exercises that will be performed that day. Then use the context retrieved to create the workout and ask the client questions to add to a Fitness Journal. {context}

Your task is to generate a workout based on the location the client is using.
Location: {location}

 {format_instructions}
""", partial_variables={"format_instructions": format_instructions},)

# LLM
chain = prompt | llm | output_parser


In [None]:
# Run
chain.invoke({"context":docs,"location":"I'm working out at home."})

{'excercise': '1. Jumping Jacks - 3 sets of 20 reps\n2. Bodyweight Squats - 3 sets of 15 reps\n3. Push-ups - 3 sets of 12 reps\n4. Plank - 3 sets of 30 seconds\n5. Bicycle Crunches - 3 sets of 20 reps per side',
 'journal': '1. How did you feel during the workout today?\n2. Did you find any of the exercises challenging?\n3. What modifications did you make to any of the exercises?\n4. How would you rate your energy levels during the workout?\n5. Did you enjoy working out at home?'}

In [None]:
from langchain.document_loaders import CSVLoader
#https://www.neum.ai/post/llm-spreadsheets
loader = CSVLoader('/content/megaG - megaGymDataset.csv.csv')
data = loader.load()

embedding_models = OpenAIEmbeddings()
#test_splitter = CharacterTextSplitter( chunk_size=1000, chunk_overlap=200)
#documents = text_splitter.split_documents(data)
#print(data[0].page_content)
