Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions src/backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,6 @@ async def stream_cluster_status():
)


@app.post("/v1/completions")
async def openai_v1_completions(raw_request: Request):
request_data = await raw_request.json()
request_id = uuid.uuid4()
received_ts = time.time()
return await request_handler.v1_completions(request_data, request_id, received_ts)


@app.post("/v1/chat/completions")
async def openai_v1_chat_completions(raw_request: Request):
request_data = await raw_request.json()
Expand Down Expand Up @@ -145,6 +137,7 @@ async def serve_index():
f"/ip4/0.0.0.0/udp/{args.udp_port}/quic-v1",
],
announce_maddrs=args.announce_maddrs,
http_port=args.port,
)

request_handler.set_scheduler_manage(scheduler_manage)
Expand Down
1 change: 1 addition & 0 deletions src/backend/server/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
CLUSTER_STATUS_WAITING = "waiting"
CLUSTER_STATUS_AVAILABLE = "available"
CLUSTER_STATUS_REBALANCING = "rebalancing"
CLUSTER_STATUS_FAILED = "failed"

# Node status constants
NODE_STATUS_WAITING = "waiting"
Expand Down
49 changes: 48 additions & 1 deletion src/backend/server/rpc_connection_handler.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import time

from lattica import ConnectionHandler, Lattica, rpc_method, rpc_stream
from lattica import ConnectionHandler, Lattica, rpc_method, rpc_stream, rpc_stream_iter

from parallax_utils.logging_config import get_logger
from scheduling.node import Node, NodeHardwareInfo
from scheduling.scheduler import Scheduler

logger = get_logger(__name__)

import json

import httpx


class RPCConnectionHandler(ConnectionHandler):
"""
Expand All @@ -19,10 +23,12 @@ def __init__(
self,
lattica: Lattica,
scheduler: Scheduler,
http_port: int,
):
# Initialize the base class
super().__init__(lattica)
self.scheduler = scheduler
self.http_port = http_port

@rpc_stream
def node_join(self, message):
Expand Down Expand Up @@ -79,6 +85,47 @@ def node_update(self, message):
logger.exception(f"node_update error: {e}")
return {}

@rpc_stream_iter
def chat_completion(
self,
request,
):
"""Handle chat completion request"""
logger.debug(f"Chat completion request: {request}, type: {type(request)}")
try:
with httpx.Client(timeout=10 * 60, proxy=None, trust_env=False) as client:
if request.get("stream", False):
with client.stream(
"POST",
f"http://localhost:{self.http_port}/v1/chat/completions",
json=request,
) as response:
for chunk in response.iter_bytes():
if chunk:
yield chunk
else:
response = client.post(
f"http://localhost:{self.http_port}/v1/chat/completions", json=request
).json()
yield json.dumps(response).encode()
except Exception as e:
logger.exception(f"Error in chat completion: {e}")
yield b"internal server error"

@rpc_stream_iter
def cluster_status(self):
try:
with httpx.Client(timeout=10 * 60, proxy=None, trust_env=False) as client:
with client.stream(
"GET", f"http://localhost:{self.http_port}/cluster/status"
) as response:
for chunk in response.iter_bytes():
if chunk:
yield chunk
except Exception as e:
logger.exception(f"Error in cluster status: {e}")
yield json.dumps({"error": "internal server error"}).encode()

def wait_layer_allocation(self, current_node_id, wait_seconds):
start_time = time.time()
while True:
Expand Down
4 changes: 3 additions & 1 deletion src/backend/server/scheduler_manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,15 @@ def __init__(
dht_prefix: str = "gradient",
host_maddrs: List[str] = [],
announce_maddrs: List[str] = [],
http_port: int = 3001,
):
"""Initialize the manager with networking bootstrap parameters."""
self.initial_peers = initial_peers
self.relay_servers = relay_servers
self.dht_prefix = dht_prefix
self.host_maddrs = host_maddrs
self.announce_maddrs = announce_maddrs

self.http_port = http_port
self.model_name = None
self.init_nodes_num = None
self.scheduler = None
Expand Down Expand Up @@ -190,6 +191,7 @@ def _start_lattica(self):
self.connection_handler = RPCConnectionHandler(
lattica=self.lattica,
scheduler=self.scheduler,
http_port=self.http_port,
)
logger.debug("RPCConnectionHandler initialized")

Expand Down
13 changes: 13 additions & 0 deletions src/frontend/chat.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="./src/assets/gradient-icon.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>CHAT Parallax by Gradient</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/chat.tsx"></script>
</body>
</html>
320 changes: 320 additions & 0 deletions src/frontend/dist/assets/App-BqDXU43s.js

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions src/frontend/dist/assets/chat-C9ZhywtF.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading