In [16]:
import requests
import json
import os
from dotenv import load_dotenv

load_dotenv()
secrets = [s.strip() for s in os.getenv("phi_3").split(",")]

url = secrets[0]
key = secrets[1]

api_url = url + "/v1/chat/completions"

headers = {
    "Authorization": f"Bearer {key}",
    "Content-Type": "application/json",
}

In [19]:
import threading

def stream_response(headers, payload, thread_id):
    with requests.post(api_url, json=payload, headers=headers, stream=True) as response:
        response.raise_for_status()
        print(f"\n[Thread {thread_id}] Response:")
        for line in response.iter_lines():
            if line:
                decoded_line = line.decode("utf-8").strip()
                if decoded_line.startswith("data: "):
                    json_data = decoded_line[6:].strip()
                    if json_data and json_data != "[DONE]":
                        try:
                            parsed_data = json.loads(json_data)
                            if "choices" in parsed_data and parsed_data["choices"]:
                                chunk = parsed_data["choices"][0].get("delta", {}).get("content", "")
                                if chunk:
                                    print(chunk, end="", flush=True)
                        except json.JSONDecodeError:
                            continue
        print(f"\n[Thread {thread_id}] done.")

# Function to send requests in parallel
def parallel_requests(prompts):
    threads = []
    for i, prompt in enumerate(prompts):
        payload = {
            "frequency_penalty": 0,
            "max_tokens": 2048,
            "messages": [{"role": "user", "content": prompt}],
            "presence_penalty": 0,
            "stream": True,
            "temperature": 0.8,
            "top_p": 0.5
        }
        thread = threading.Thread(target=stream_response, args=(headers, payload, i + 1))
        threads.append(thread)
        thread.start()
    
    for thread in threads:
        thread.join()
    
    print("\nAll threads completed.")

# Test with multiple prompts
prompts = [
    "What is the meaning of life?",
    "Describe a futuristic city in the year 2100.",
    "Explain how quantum computing will change the world.",
    "Tell a short story about an AI gaining consciousness.",
    "Describe the feeling of standing on Mars for the first time."
]

parallel_requests(prompts)



[Thread 1] Response:
The meaning of
[Thread 2] Response:

[Thread 5] Response:
Standing life
[Thread 3] Response:
In
[Thread 4] Response:
QuantIn on Marsum the the for is year the computing heart is a  of first philosophical Silicon an210 question time Valley emerging is0 concerning an technology,, the experience that the nestled that significance futuristic lever within def of cityages theies existence the of labyrinth all or Neo principlesine earthly consciousness of-T circuits comparisons.. ofok quantum Answers Asyo a mechanics to you stands to super step this as processcomputer out question a named information of vary testament. A the greatly toether Unlike land dependinger, classical human on, innovation computers a cultural the and spark thin,, of resilience, which religious consciousness. use rust, Rising bits flick-colored andered from as atmosphere individual the to the envelop perspectives ashes basic lifes.

. of unit youSome its of A, people a information predecessorether 