Skip to content

Commit

Permalink
feat: ✨ Intelligent caching keeps up with version gpt-1106 (#95)
Browse files Browse the repository at this point in the history
* ✨ Intelligent caching keeps up with version gpt-1106

* deps fix
  • Loading branch information
KenyonY authored Nov 19, 2023
1 parent 4d26d6d commit a6f9a2c
Show file tree
Hide file tree
Showing 22 changed files with 478 additions and 278 deletions.
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ CACHE_CHAT_COMPLETION=true
# `CACHE_BACKEND`: Options (MEMORY, LMDB, LevelDB)
CACHE_BACKEND=LMDB

#LOG_CACHE_DB_INFO=false
#LOG_CACHE_DB_INFO=true

#BENCHMARK_MODE=true

Expand Down
28 changes: 3 additions & 25 deletions .env.chatgpt
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,20 @@ LOG_CHAT=true

CACHE_CHAT_COMPLETION=true

# `CACHE_BACKEND`: Options (MEMORY, LMDB, LevelDB)
CACHE_BACKEND=LMDB
CACHE_BACKEND=LevelDB

#LOG_CACHE_DB_INFO=true

OPENAI_BASE_URL='https://api.openai.com/v1'
OPENAI_ROUTE_PREFIX='/v1'

CHAT_COMPLETION_ROUTE=/chat/completions
COMPLETION_ROUTE=/completions

#OPENAI_API_KEY=
#FORWARD_KEY=


EXTRA_BASE_URL='http://chatgpt:7999'
EXTRA_ROUTE_PREFIX='/'

REQ_RATE_LIMIT={"/v1/chat/completions":"100/2minutes", "/v1/completions":"60/minute;600/hour"}

#GLOBAL_RATE_LIMIT=100/minute

#`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) :ref: https://limits.readthedocs.io/en/latest/strategies.html
# `fixed-window`: most memory efficient strategy; `moving-window`:most effective for preventing bursts but has higher memory cost.
RATE_LIMIT_STRATEGY=moving-window

# Rate limit for returned tokens
TOKEN_RATE_LIMIT={"/v1/chat/completions":"60/second","/v1/completions":"60/second"}

# TCP connection timeout duration (in seconds)
TIMEOUT=6

ITER_CHUNK_TYPE=one-by-one
#ITER_CHUNK_TYPE=efficiency

#IP_BLACKLIST=
#OPENAI_API_KEY=
#FORWARD_KEY=

# Set timezone
TZ=Asia/Shanghai
Binary file removed .github/data/whisper.m4a
Binary file not shown.
7 changes: 5 additions & 2 deletions .github/workflows/gh-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,16 @@ jobs:
- name: Checkout code
uses: actions/checkout@v3

- name: Extract tag name
id: extract_tag
run: echo "TAG_NAME=$(echo ${GITHUB_REF#refs/tags/})" >> $GITHUB_ENV

- name: Create Release and Generate Notes
id: create_release
uses: softprops/action-gh-release@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
tag_name: ${{ github.ref }}
name: 💫 Release ${{ github.ref }}
name: 💫 Release ${{ env.TAG_NAME }}
draft: false
prerelease: false
generate_release_notes: true
15 changes: 0 additions & 15 deletions .github/workflows/issue-translator.yml

This file was deleted.

3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ ssl/
chat.yaml
chat_*.yaml

config.toml
config_parser.py

har/
Log/
Log-caloi-top/
Expand Down
14 changes: 6 additions & 8 deletions Examples/chat_completion.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from openai import OpenAI
from openai._types import Headers, Query
from rich import print
from sparrow import MeasureTime, yaml_load # pip install sparrow-python

Expand Down Expand Up @@ -27,7 +26,9 @@
user_content = """
用c实现目前已知最快平方根算法
"""
# user_content = 'hi'
user_content = 'hi'
model = "gpt-3.5-turbo"
# model="gpt-4"

mt = MeasureTime().start()

Expand All @@ -51,7 +52,7 @@
}
]
resp = client.chat.completions.create(
model="gpt-3.5-turbo",
model=model,
messages=[
{"role": "user", "content": "What's the weather like in Boston today?"}
],
Expand All @@ -63,8 +64,7 @@

else:
resp = client.chat.completions.create(
model="gpt-3.5-turbo",
# model="gpt-4",
model=model,
messages=[
{"role": "user", "content": user_content},
],
Expand All @@ -73,9 +73,7 @@
max_tokens=max_tokens,
timeout=30,
# extra_headers=(caching, caching)
# extra_query={"caching": False},
# extra_headers = {"caching": False},
extra_body={"caching": False},
extra_body={"caching": caching},
)

if stream:
Expand Down
76 changes: 76 additions & 0 deletions Examples/demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from openai import OpenAI
from openai._types import Headers, Query
from rich import print
from sparrow import MeasureTime, yaml_load # pip install sparrow-python

config = yaml_load("config.yaml", rel_path=True)
print(f"{config=}")

client = OpenAI(
api_key=config['api_key'],
base_url=config['api_base'],
)

caching = True
# caching = False
stream = True

json_obj_case = True
function_case = True

if json_obj_case:
response = client.chat.completions.create(
model="gpt-3.5-turbo-1106",
response_format={"type": "json_object"},
messages=[
{
"role": "system",
"content": "You are a helpful assistant designed to output JSON.",
},
{"role": "user", "content": "Who won the world series in 2020?"},
],
stream=stream,
extra_body={"caching": caching},
)
if stream:
for chunk in response:
print(chunk)
else:
print(response.choices[0].message.content)

if function_case:
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=messages,
tools=tools,
tool_choice="auto",
stream=stream,
extra_body={"caching": caching},
)

if stream:
for chunk in completion:
print(chunk)
else:
print(completion)
18 changes: 18 additions & 0 deletions Examples/tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from openai import OpenAI
from sparrow import relp, yaml_load

config = yaml_load("config.yaml")

client = OpenAI(
base_url=config["api_base"],
api_key=config["api_key"],
)

speech_file_path = relp("./speech.mp3")
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input="Today is a wonderful day to build something people love!",
)

response.stream_to_file(speech_file_path)
15 changes: 9 additions & 6 deletions Examples/whisper.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# Note: you need to be using OpenAI Python v0.27.0 for the code below to work
import openai
from openai import OpenAI
from sparrow import relp, yaml_load

config = yaml_load("config.yaml")
openai.api_base = config["api_base"]
openai.api_key = config["api_key"]
audio_file = open(relp("../.github/data/whisper.m4a"), "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)

client = OpenAI(
base_url=config["api_base"],
api_key=config["api_key"],
)

audio_file = open("/path/to/audio.mp3", "rb")
transcript = client.audio.transcriptions.create(model="whisper-1", file=audio_file)
print(transcript)
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@ monitor:

start:
@docker run -d \
--restart=unless-stopped \
--name $(container) \
--env-file .env \
-p 8000:8000 \
-v $(shell pwd)/Log:/home/openai-forward/Log \
-v $(shell pwd)/CACHE_LMDB:/home/openai-forward/CACHE_LMDB \
-v $(shell pwd)/CACHE_LEVELDB:/home/openai-forward/CACHE_LEVELDB \
-v $(shell pwd)/openai_forward:/home/openai-forward/openai_forward \
$(image)
$(image) --port=8000 --workers=2
@make log

exec:
Expand Down
Loading

0 comments on commit a6f9a2c

Please sign in to comment.