<a href="https://colab.research.google.com/github/OMarieH/FITPT/blob/main/Capstone_FITPT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U langchain-openai
!pip install -qU langchain
!pip install pinecone-client

In [2]:
# Import Colab Secrets userdata module
from google.colab import userdata

# Set OpenAI API key
import os
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

# Set other API keys similarly
os.environ["PINECONE_API_KEY"] = userdata.get('PINECONE_API_KEY')

In [3]:
import time
from pinecone import Pinecone
from pinecone import ServerlessSpec

pc = Pinecone(os.environ["PINECONE_API_KEY"])

spec = ServerlessSpec(cloud="aws", region="us-east-1")

index_name = 'fit-pt-rag-test'

# check if index already exists (it shouldn't if this is your first run)
if index_name not in pc.list_indexes().names():
    # if does not exist, create index
    pc.create_index(
        index_name,
        dimension=1536,
        metric='euclidean',
        spec=spec
    )
    # wait for index to be initialized
    while not pc.describe_index(index_name).status['ready']:
        time.sleep(1)

# connect to index
index = pc.Index(index_name)
time.sleep(1)
# view index stats
index.describe_index_stats()


{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 487}},
 'total_vector_count': 487}

In [7]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = WebBaseLoader("https://www.helpguide.org/articles/healthy-living/the-mental-health-benefits-of-exercise.htm")
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(docs)

len(documents)

1

In [8]:
from langchain.vectorstores import  Pinecone
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"])
docsearch = Pinecone.from_texts([documents[0].page_content for t in docs], embeddings, index_name=index_name)

In [9]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 489}},
 'total_vector_count': 489}

In [10]:
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA

llm = ChatOpenAI(
    openai_api_key=os.environ["OPENAI_API_KEY"],
    model_name='gpt-3.5-turbo',
    temperature=0.0)

retriever = docsearch.as_retriever()
docs = retriever.invoke("question")

In [11]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import ResponseSchema, StructuredOutputParser

response_schemas = [
    ResponseSchema(name="excercise", description="The list of exercises for a workout plan"),
    ResponseSchema(name="journal", description="The questions for the fitness journal",)]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)


In [12]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

format_instructions = output_parser.get_format_instructions()

# Prompt
prompt = ChatPromptTemplate.from_template(
    template="""You are a Personal Fitness Trainer. First Generate a detailed list of exercises that will be performed that day.
    Then use the context retrieved to create the workout and ask the client questions about the workout to add to a Fitness Journal. {context}

Your task is to generate a workout based on the location the client is using.
Location: {location}

 {format_instructions}
""", partial_variables={"format_instructions": format_instructions},)

# LLM
chain = prompt | llm | output_parser


In [14]:
# Run
chain.invoke({"context":docs,"location":"I'm working out at a park."})

{'excercise': '1. Jogging around the park for 10 minutes\n2. Bodyweight squats x 15 reps\n3. Push-ups x 12 reps\n4. Lunges x 12 reps each leg\n5. Bench dips x 15 reps\n6. Plank for 30 seconds\n7. Jumping jacks x 20 reps',
 'journal': '1. How did you feel about working out in a park compared to a gym?\n2. Did you enjoy the fresh air and scenery during your workout?\n3. Were there any challenges or obstacles you faced while exercising outdoors?\n4. How did the change in environment impact your motivation and energy levels during the workout?\n5. Did you notice any differences in your performance or mindset while doing outdoor exercises?'}

In [None]:
from langchain.document_loaders import CSVLoader
#https://www.neum.ai/post/llm-spreadsheets
loader = CSVLoader('/content/megaG - megaGymDataset.csv.csv')
data = loader.load()

embedding_models = OpenAIEmbeddings()
#test_splitter = CharacterTextSplitter( chunk_size=1000, chunk_overlap=200)
#documents = text_splitter.split_documents(data)
#print(data[0].page_content)
