feat: ✨ Intelligent caching keeps up with version gpt-1106 (#95)

* ✨ Intelligent caching keeps up with version gpt-1106 * deps fix
KenyonY · Nov 19, 2023 · a6f9a2c · a6f9a2c
1 parent 4d26d6d
commit a6f9a2c
Show file tree

Hide file tree

Showing 22 changed files with 478 additions and 278 deletions.
diff --git a/.env b/.env
@@ -8,7 +8,7 @@ CACHE_CHAT_COMPLETION=true
 # `CACHE_BACKEND`: Options (MEMORY, LMDB, LevelDB)
 CACHE_BACKEND=LMDB
 
-#LOG_CACHE_DB_INFO=false
+#LOG_CACHE_DB_INFO=true
 
 #BENCHMARK_MODE=true
 

diff --git a/.env.chatgpt b/.env.chatgpt
@@ -2,42 +2,20 @@ LOG_CHAT=true
 
 CACHE_CHAT_COMPLETION=true
 
-# `CACHE_BACKEND`: Options (MEMORY, LMDB, LevelDB)
-CACHE_BACKEND=LMDB
+CACHE_BACKEND=LevelDB
 
-#LOG_CACHE_DB_INFO=true
 
 OPENAI_BASE_URL='https://api.openai.com/v1'
 OPENAI_ROUTE_PREFIX='/v1'
 
 CHAT_COMPLETION_ROUTE=/chat/completions
 COMPLETION_ROUTE=/completions
 
-#OPENAI_API_KEY=
-#FORWARD_KEY=
-
-
 EXTRA_BASE_URL='http://chatgpt:7999'
 EXTRA_ROUTE_PREFIX='/'
 
-REQ_RATE_LIMIT={"/v1/chat/completions":"100/2minutes", "/v1/completions":"60/minute;600/hour"}
-
-#GLOBAL_RATE_LIMIT=100/minute
-
-#`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) :ref: https://limits.readthedocs.io/en/latest/strategies.html
-# `fixed-window`: most memory efficient strategy; `moving-window`:most effective for preventing bursts but has higher memory cost.
-RATE_LIMIT_STRATEGY=moving-window
-
-# Rate limit for returned tokens
-TOKEN_RATE_LIMIT={"/v1/chat/completions":"60/second","/v1/completions":"60/second"}
-
-# TCP connection timeout duration (in seconds)
-TIMEOUT=6
-
-ITER_CHUNK_TYPE=one-by-one
-#ITER_CHUNK_TYPE=efficiency
-
-#IP_BLACKLIST=
+#OPENAI_API_KEY=
+#FORWARD_KEY=
 
 # Set timezone
 TZ=Asia/Shanghai
diff --git a/.github/data/whisper.m4a b/.github/data/whisper.m4a
diff --git a/.github/workflows/gh-release.yml b/.github/workflows/gh-release.yml
@@ -20,13 +20,16 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v3
 
+      - name: Extract tag name
+        id: extract_tag
+        run: echo "TAG_NAME=$(echo ${GITHUB_REF#refs/tags/})" >> $GITHUB_ENV
+
       - name: Create Release and Generate Notes
         id: create_release
         uses: softprops/action-gh-release@v1
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
-          tag_name: ${{ github.ref }}
-          name: 💫 Release ${{ github.ref }}
+          name: 💫 Release ${{ env.TAG_NAME }}
           draft: false
           prerelease: false
           generate_release_notes: true
diff --git a/.github/workflows/issue-translator.yml b/.github/workflows/issue-translator.yml
diff --git a/.gitignore b/.gitignore
@@ -12,6 +12,9 @@ ssl/
 chat.yaml
 chat_*.yaml
 
+config.toml
+config_parser.py
+
 har/
 Log/
 Log-caloi-top/

diff --git a/Examples/chat_completion.py b/Examples/chat_completion.py
@@ -1,5 +1,4 @@
 from openai import OpenAI
-from openai._types import Headers, Query
 from rich import print
 from sparrow import MeasureTime, yaml_load  # pip install sparrow-python
 
@@ -27,7 +26,9 @@
 user_content = """
 用c实现目前已知最快平方根算法
 """
-# user_content = 'hi'
+user_content = 'hi'
+model = "gpt-3.5-turbo"
+# model="gpt-4"
 
 mt = MeasureTime().start()
 
@@ -51,7 +52,7 @@
         }
     ]
     resp = client.chat.completions.create(
-        model="gpt-3.5-turbo",
+        model=model,
         messages=[
             {"role": "user", "content": "What's the weather like in Boston today?"}
         ],
@@ -63,8 +64,7 @@
 
 else:
     resp = client.chat.completions.create(
-        model="gpt-3.5-turbo",
-        # model="gpt-4",
+        model=model,
         messages=[
             {"role": "user", "content": user_content},
         ],
@@ -73,9 +73,7 @@
         max_tokens=max_tokens,
         timeout=30,
         # extra_headers=(caching, caching)
-        # extra_query={"caching": False},
-        # extra_headers = {"caching": False},
-        extra_body={"caching": False},
+        extra_body={"caching": caching},
     )
 
 if stream:

diff --git a/Examples/demo.py b/Examples/demo.py
@@ -0,0 +1,76 @@
+from openai import OpenAI
+from openai._types import Headers, Query
+from rich import print
+from sparrow import MeasureTime, yaml_load  # pip install sparrow-python
+
+config = yaml_load("config.yaml", rel_path=True)
+print(f"{config=}")
+
+client = OpenAI(
+    api_key=config['api_key'],
+    base_url=config['api_base'],
+)
+
+caching = True
+# caching = False
+stream = True
+
+json_obj_case = True
+function_case = True
+
+if json_obj_case:
+    response = client.chat.completions.create(
+        model="gpt-3.5-turbo-1106",
+        response_format={"type": "json_object"},
+        messages=[
+            {
+                "role": "system",
+                "content": "You are a helpful assistant designed to output JSON.",
+            },
+            {"role": "user", "content": "Who won the world series in 2020?"},
+        ],
+        stream=stream,
+        extra_body={"caching": caching},
+    )
+    if stream:
+        for chunk in response:
+            print(chunk)
+    else:
+        print(response.choices[0].message.content)
+
+if function_case:
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_current_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        },
+                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                    },
+                    "required": ["location"],
+                },
+            },
+        }
+    ]
+    messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
+    completion = client.chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=messages,
+        tools=tools,
+        tool_choice="auto",
+        stream=stream,
+        extra_body={"caching": caching},
+    )
+
+    if stream:
+        for chunk in completion:
+            print(chunk)
+    else:
+        print(completion)
diff --git a/Examples/tts.py b/Examples/tts.py
@@ -0,0 +1,18 @@
+from openai import OpenAI
+from sparrow import relp, yaml_load
+
+config = yaml_load("config.yaml")
+
+client = OpenAI(
+    base_url=config["api_base"],
+    api_key=config["api_key"],
+)
+
+speech_file_path = relp("./speech.mp3")
+response = client.audio.speech.create(
+    model="tts-1",
+    voice="alloy",
+    input="Today is a wonderful day to build something people love!",
+)
+
+response.stream_to_file(speech_file_path)
diff --git a/Examples/whisper.py b/Examples/whisper.py
@@ -1,10 +1,13 @@
-# Note: you need to be using OpenAI Python v0.27.0 for the code below to work
-import openai
+from openai import OpenAI
 from sparrow import relp, yaml_load
 
 config = yaml_load("config.yaml")
-openai.api_base = config["api_base"]
-openai.api_key = config["api_key"]
-audio_file = open(relp("../.github/data/whisper.m4a"), "rb")
-transcript = openai.Audio.transcribe("whisper-1", audio_file)
+
+client = OpenAI(
+    base_url=config["api_base"],
+    api_key=config["api_key"],
+)
+
+audio_file = open("/path/to/audio.mp3", "rb")
+transcript = client.audio.transcriptions.create(model="whisper-1", file=audio_file)
 print(transcript)
diff --git a/Makefile b/Makefile
@@ -13,12 +13,15 @@ monitor:
 
 start:
 	@docker run -d \
+	--restart=unless-stopped \
     --name $(container) \
     --env-file .env \
     -p 8000:8000 \
     -v $(shell pwd)/Log:/home/openai-forward/Log \
+	-v $(shell pwd)/CACHE_LMDB:/home/openai-forward/CACHE_LMDB \
+	-v $(shell pwd)/CACHE_LEVELDB:/home/openai-forward/CACHE_LEVELDB \
     -v $(shell pwd)/openai_forward:/home/openai-forward/openai_forward \
-    $(image)
+    $(image) --port=8000 --workers=2
 	@make log
 
 exec: