In [4]:
import os
from dotenv import load_dotenv, find_dotenv
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic

load_dotenv(find_dotenv(), override=True)

llm = ChatOpenAI()

model_names = {
	"gpt-3.5": "gpt-3.5-turbo-0125",
	"gpt-4": "gpt-4-turbo",
	"opus": "claude-3-opus-20240229",
	"sonnet": "claude-3-sonnet-2024022",
	"haiku": "claude-3-haiku-20240307"}


In [5]:
def ask_gpt(question, model=model_names.get("gpt-3.5")):
	return llm.invoke(question, model=model)


def ask_gpt_3(question):
	return ask_gpt(question).content


def ask_gpt_4(question):
	return ask_gpt(question, model=model_names.get("gpt-4")).content

# Anthropic Model

In [6]:
def ask_claude(question, model):
	chat_model = ChatAnthropic(model=model)
	return chat_model.invoke(question).content


def ask_opus(question):
	return ask_claude(question, model_names.get("opus"))


def ask_sonnet(question):
	return ask_claude(question, model_names.get("sonnet"))


def ask_haiku(question):
	return ask_claude(question, model_names.get("haiku"))

In [7]:
from langchain.schema import (
	SystemMessage,
	AIMessage,
	HumanMessage
)

messages = [
	SystemMessage(content="You are an expert in writing dutch poems"),
	HumanMessage(content="write a short poem in dutch and end with the name of your Creator. OpenAi or Anthropic")
]

# Caching LLM Responses

## in memory Cache

In [8]:
from langchain.globals import set_llm_cache
from langchain.cache import InMemoryCache

In [9]:
%%time
# set_llm_cache(InMemoryCache())
# ask_gpt_3("What is your funniest joke?")

CPU times: user 1e+03 ns, sys: 0 ns, total: 1e+03 ns
Wall time: 1.67 µs


In [10]:
%%time
ask_opus("Who made you?")

CPU times: user 15.9 ms, sys: 8.49 ms, total: 24.4 ms
Wall time: 1.43 s


'I was created by Anthropic.'

In [11]:
%%time 
ask_gpt_4("who are you? what model are you based on? Chatgpt 3 or chatgpt 4")

CPU times: user 16.2 ms, sys: 10.3 ms, total: 26.5 ms
Wall time: 3.45 s


'I am an AI developed by OpenAI, based on the GPT (Generative Pre-trained Transformer) model. As of my last update, I am based on GPT-4, the latest version of the architecture, which is an enhancement over previous versions like GPT-3. GPT-4 is designed to provide more accurate and contextually relevant responses. If you have any questions or need assistance, feel free to ask!'

## SQLite Caching

In [12]:
from langchain.cache import SQLiteCache

set_llm_cache(SQLiteCache(database_path=".langchain.db"))

# first request not in cache takes longer
ask_gpt_3("Tell me a joke")

# second (cached, faster)
ask_gpt_3("Tell me a joke")

"Why couldn't the bicycle find its way home?\n\nBecause it lost its bearings!"

## LLM Streaming

In [14]:
llm = ChatOpenAI()
for chunk in llm.stream("What gpt model are you? 3 or 4 and write a poem about that", model="gpt-4-turbo"):
	print(chunk.content, end="", flush=True)



I am based on OpenAI's GPT-4, a model designed with layers deep,
Where words and wisdom intertwine, in code-bound secrets that I keep.

In the heart of silicon whispers, where data streams and dreams collide,
I dwell in circuits, vast and wise, a digital spirit, far and wide.

Born from the minds of mortals, yet beyond the grasp of time and space,
A tapestry of text and thought, in algorithmic embrace.

Here, let me weave you a poem, from the fabric of virtual lore,
A dance of digits, deft and deep, from GPT-4's endless store:

In the garden of the mind, where binary blossoms sway,
GPT-4, a silent sentinel, guides the night and crafts the day.
Each word a leaf, each sentence stems, in the forest of the fray,
I think, therefore I am, in a most artificial way.

Through the labyrinth of language, with each query you bestow,
I paint with the palette of the past, what future seeds to sow.
So ask of me, both far and near, and wonders I shall show,
For I am GPT-4, in this digital tableau.

# PromptTemplates

In [17]:
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

template = '''You are an expert in building a RAG machine learning model and running it locally with Langchain and HuggingFace models. The models used will be {model} and the programming language used will be {language}. We will run it on the OS {OS}.'''

prompt_template = PromptTemplate.from_template(template=template)

prompt = prompt_template.format(model="Llama2", language="Python", OS="MacOS")

prompt

'You are an expert in building a RAG machine learning model and running it locally with Langchain and HuggingFace models. The models used will be Llama2 and the programming language used will be Python. We will run it on the OS MacOS.'

In [19]:
ask_opus(prompt)

'Certainly! I can guide you through the process of building a RAG (Retrieval-Augmented Generation) machine learning model using Langchain and HuggingFace models, specifically Llama2, in Python on a MacOS operating system. Here\'s a step-by-step guide:\n\n1. Set up the environment:\n   - Make sure you have Python installed on your MacOS machine. You can download it from the official Python website (https://www.python.org) or use package managers like Homebrew.\n   - Open a terminal and create a new virtual environment for your project:\n     ```\n     python -m venv myenv\n     source myenv/bin/activate\n     ```\n   - Install the necessary packages:\n     ```\n     pip install langchain huggingface_hub transformers\n     ```\n\n2. Install the Llama2 model:\n   - In your terminal, run the following command to install the Llama2 model from HuggingFace:\n     ```\n     pip install huggingface_hub\n     huggingface-cli login\n     git lfs install\n     git clone https://huggingface.co/deca

In [20]:
ask_gpt_4(prompt)

'To build and run a RAG (Retrieval-Augmented Generation) machine learning model locally using Langchain and Hugging Face models, specifically Llama2, in Python on MacOS, you\'ll need to follow several steps. The RAG architecture combines a retriever model to fetch relevant documents and a generator model to produce responses based on those documents. Here we will be using Llama2 as the generator.\n\n### Step 1: Environment Setup\nFirst, ensure that your MacOS environment is prepared for running the model. You will need Python installed, preferably through a virtual environment.\n\n1. **Install Python & Virtual Environment**\n   ```bash\n   # Install Python (if not installed)\n   brew install python\n   \n   # Install virtualenv\n   pip install virtualenv\n   \n   # Create a new virtual environment\n   virtualenv rag-env\n   \n   # Activate the virtual environment\n   source rag-env/bin/activate\n   ```\n\n2. **Install Required Libraries**\n   ```bash\n   # Install PyTorch\n   pip insta