In [None]:
# Libraries
import os
from dotenv import load_dotenv


In [None]:
load_dotenv('./.env')
os.environ.get('PINECONE_ENV')

## Lang Chain first steps

In [None]:
# Older
# from langchain.llms import OpenAI
# llm = OpenAI(model_name='text-davinci-003', temperature=0.7, max_tokens=512)

# Newer
from langchain_community.llms import OpenAI
llm = OpenAI(model_name='davinci-002', temperature=0.7, max_tokens=512)

print(llm)

In [None]:
output = llm('explain how python functions work')

In [None]:
print(output)

In [None]:
# check number of tokens
print(llm.get_num_tokens('explain how python functions work'))

In [None]:
output = llm.generate(['... is the capital of France',
                       'what is the formula of the area of the circle?'])

In [None]:
print(output.generations)

In [None]:
# get only the first one
output.generations[0].[0].text

In [None]:
# generate more than one using the same prompt
output = llm.generate(['write an original slogan for a sushi restaurant'] * 3)

In [None]:
for o in output.generations:
    print(o.[0].text, end='')

## ChatModels: GPT-3.5-Turbo e GPT-4

In [None]:
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

from langchain.chat_models import ChatOpenAI

In [None]:
# new way
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI

In [None]:
chat = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.5, max_tokens=1024)
messages = [
    SystemMessage(content='you are a specialist in machine learning that answers everything is enlish.'),
    HumanMessage(content='explain in one paragraph what is machine learning.')
]
output = chat(messages)

In [None]:
print(output.content)

## Prompt Templates

In [None]:
from langchain.prompts import PromptTemplate

In [None]:
template = """You are an experienced virologist.
Write a few sentences about the following {virus} and {language}."""

prompt = PromptTemplate(
    input_variables=['virus', 'language'],
    template=template
)
print(prompt)

In [None]:
# from langchain.llms import OpenAI
# llm = OpenAI(model_name='text-davinci-003', temperature=0.7)

from langchain_openai import OpenAI
llm = OpenAI(model_name='gpt-3.5-turbo-instruct', temperature=0.7)

output = llm(prompt.format(virus='ebola', language='English'))
print(output)

## Simple Chains

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

llm = ChatOpenAI(model_name='gpt-4', temperature=0.5)
template = """You are an experienced virologist.
Write a few sentences about the following {virus} and {language}."""

prompt = PromptTemplate(
    input_variables=['virus', 'language'],
    template=template
)

chain = LLMChain(llm=llm, prompt=prompt)

output = chain.run({'virus': 'HIV', 'lenaguage': 'english'})


In [None]:
output

## Sequencial Chains

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, SimpleSequentialChain

llm1 = OpenAI(model_name='gpt-3.5-turbo-instruct', temperature=0.7, max_tokens=1024)

prompt1 = PromptTemplate(
    input_variables=['thing'],
    template="""You are an experience data scientist and Python programmer.
    Write a function that implements {thing} concept."""
)

chain1 = LLMChain(llm=llm1, prompt=prompt1)

#--- second chain ---#
llm2 = OpenAI(model_name='gpt-3.5-turbo', temperature=1.2)

prompt2 = PromptTemplate(
    input_variables=['function'],
    template="""Given the function {fuction} Python, describe how it works with the most details possible."""
)

chain2 = LLMChain(llm=llm2, prompt=prompt2)

overall_chain = SimplesSequentialChain(chains=[chain1, chain2], verbose=True)
output = overall_chain.run("Linear Regression")

## LangChain Agents

In [None]:
# exemplo exponenciação

5.1 ** 7.3

In [None]:
from langchain_experimental.agents.agent_toolkits import create_python_agent
from langchain_experimental.tools.python.tool import PythonAstREPLTool
from langchain.llms import OpenAI

In [None]:
llm = OpenAI(temperature=0)
agent_executor = create_python_agent(
    llm=llm,
    tool=PythonAstREPLTool(),
    verbose=True
)
agent_executor.run('what is the answer to 5.1 to the power of 7.3')

## Splitting & Text Embedding

In [None]:
import os
project_dir = os.path.join(os.path.expanduser("~"), 
                           "OneDrive", 
                           "Project_Code", 
                           "Project-LinuxTips-LLM_2025")
file_path = os.path.join(project_dir,"docs", "example_text.txt")                           

from langchain.text_splitter import RecursiveCharacterTextSplitter

with open(file_path) as f:
    clt = f.read()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len
)

In [None]:
chunks = text_splitter.create_documents([clt])

In [None]:
chunks[0]

In [None]:
chunks[2]

In [None]:
chunks[2].page_content

In [None]:
len(chunks)

In [None]:
def embedding_cost(texts):
    import tiktoken
    enc = tiktoken.encoding_for_model('text-embedding-ada-002')
    total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])
    print(f'Total tokens: {total_tokens}')
    print(f'Cost of Embedding in USD: {total_tokens / 1000 * 0.0001:.6f}')

In [None]:
embedding_cost(chunks)

### Creating embeddings

In [None]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [None]:
vector = embeddings.embed_query(chunks[0].page_content)

In [None]:
# vector

In [None]:
import os
import pinecone
from langchain.vectorstores import Pinecone

pinecone.init(api_key=os.environ.get('PINECONE_API_KEY'), environment=os.environ.get('PINECONE_ENV'))

In [None]:
indexes = pinecone.list_indexes()
for i in indexes:
    pinecone.delete_index(i)
    print('Done!')

In [None]:
index_name = 'linuxtips'
if index_name not in pinecone.list_indexes():
    pinecone.create_index(index_name, dimension=1536, metric='cosine')
    print('Done!')

In [None]:
vector_store = Pinecone.from_documents(chunks, embeddings, index_name=index_name)