In [1]:
import requests
import os
from dotenv import load_dotenv
import json

load_dotenv()
apiKey = os.getenv("HKBU_API_KEY")

if apiKey is None:
    raise ValueError("HKBU_API_KEY not found in environment variables")

In [2]:
basicUrl = "https://genai.hkbu.edu.hk/general/rest"

### OPENAI Models

In [3]:
openai_models = [
    {
        "model": "gpt-4-o",
        'api-version': '2024-10-21',
    },
    {
        "model": "gpt-4-o-mini",
        'api-version': '2024-10-21',
    },
    {
        "model": "o1-preview",
        'api-version': '2024-10-21',
    },
    {
        "model": "o1-mini",
        'api-version': '2024-10-21',
    },
    {
        "model": "text-embedding-3-large",
        'api-version': '2024-05-01-preview',
    },
    {
        "model": "text-embedding-3-small",
        'api-version': '2024-05-01-preview',
    },
]


In [None]:
def OpenAI(
        message: str,
        model_name: str = "gpt-4-o-mini",
        imageURL: str = None,
        temperature: float = 0,
        max_tokens: int = 100,
        tools: list = None,
        stream: bool = False
        # response_format: dict = None,
        ):
    # Find the model in the openai_models list
    model_info = next((model for model in openai_models if model["model"] == model_name), None)
    
    if not model_info:
        raise ValueError(f"Model {model_name} not found in openai_models list")
    
    api_version = model_info['api-version']
    
    conversation = [{"role": "user", "content": message}]
    
    if imageURL:
        conversation[0]["content"] = [
            {"type": "text", "text": message},
            {"type": "image_url", "image_url": {"url": imageURL, "detail": "low"}}
        ]
    
    url = basicUrl + "/deployments/" + model_name + "/chat/completions/?api-version=" + api_version
    headers = { 'Content-Type': 'application/json', 'api-key': apiKey }
    payload = { 
        'messages': conversation,
        'temperature': temperature,
        'max_tokens': max_tokens,
        'tools': tools,
        "stream": stream,
        # "response_format": response_format
    }
    
    response = requests.post(url, json=payload, headers=headers)

    if response.status_code == 200:
        data = response.json()
        return data
    else:
        return 'Error:', response

In [None]:
result = OpenAI(
    message="hello", 
    model_name="gpt-4-o-mini", 
    imageURL=None, 
    temperature=0.5, 
    max_tokens=20,
    tools=None,
    stream=True,
    response_format="text"
    )
print(json.dumps(result, indent=4))

{
    "id": "chatcmpl-B3KKTKNxY4FRy5GdCHAV7uTSAiNlV",
    "object": "chat.completion",
    "created": 1740132829,
    "model": "gpt-4o-mini-2024-07-18",
    "choices": [
        {
            "index": 0,
            "message": {
                "role": "assistant",
                "content": "Hello! How can I assist you today?",
                "refusal": null
            },
            "logprobs": null,
            "finish_reason": "stop"
        }
    ],
    "usage": {
        "prompt_tokens": 8,
        "completion_tokens": 9,
        "total_tokens": 17,
        "prompt_tokens_details": {
            "cached_tokens": 0,
            "audio_tokens": 0
        },
        "completion_tokens_details": {
            "reasoning_tokens": 0,
            "audio_tokens": 0,
            "accepted_prediction_tokens": 0,
            "rejected_prediction_tokens": 0
        }
    },
    "system_fingerprint": "fp_b705f0c291"
}


### Claude Models

In [6]:
claude_models = [
    {
        "model": "claude-3-5-sonnet",
        "api-version": "20240620"
    },
    {
        "model": "claude-3-haiku",
        "api-version": "20240307"
    }
]

In [None]:
def Claude(
        message, 
        model_name="claude-3-5-sonnet", 
        imageURL=None, 
        temperature=0, 
        max_tokens=100
        ):
    # Find the model in the claude_models list
    model_info = next((model for model in claude_models if model["model"] == model_name), None)
    
    if not model_info:
        raise ValueError(f"Model {model_name} not found in claude_models list")
    
    api_version = model_info['api-version']
    
    conversation = [{"role": "user", "content": message}]
    
    if imageURL:
        conversation[0]["content"] = [
            {"type": "text", "text": message},
            {"type": "image_url", "image_url": {"url": imageURL, "detail": "low"}}
        ]
    
    url = basicUrl + "/deployments/" + model_name + "/messages/?api-version=" + api_version
    headers = { 'Content-Type': 'application/json', 'api-key': apiKey }
    payload = { 
        'messages': conversation,
        'temperature': temperature,
        'max_tokens': max_tokens
    }
    
    response = requests.post(url, json=payload, headers=headers)

    if response.status_code == 200:
        data = response.json()
        return data
    else:
        return 'Error:', response


In [16]:
result = Claude(message="hello", model_name="claude-3-haiku", imageURL=None, temperature=0.5, max_tokens=20)

if isinstance(result, tuple) and result[0] == 'Error:':
	print(f"Error: {result[1].status_code} - {result[1].text}")
else:
	print(json.dumps(result, indent=4))

Error: 429 - {"error":{"message":"Quota exceeded for aiplatform.googleapis.com/online_prediction_requests_per_base_model with base model: anthropic-claude-3-haiku. Please submit a quota increase request. https://cloud.google.com/vertex-ai/docs/generative-ai/quotas-genai.","param":null,"code":429}}


### Gemini Models

In [17]:
gemini_models = [
    {
        "model": "gemini-1.5-pro",
        "api-version": "002"
    },
    {
        "model": "gemini-1.5-flash",
        "api-version": "002"
    }
]


In [18]:
def Gemini(message, model_name="gemini-1.5-flash", temperature=0.7, maxOutputTokens=10, stream=False):
    # Find the model in the gemini_models list
    model_info = next((model for model in gemini_models if model["model"] == model_name), None)
    
    if not model_info:
        raise ValueError(f"Model {model_name} not found in gemini_models list")
    
    api_version = model_info['api-version']
    
    contents = [{"role": "user", "parts": [{"text": message}]}]
    
    url = f"{basicUrl}/deployments/{model_name}/generate_content?api-version={api_version}"
    headers = { 
        'Content-Type': 'application/json', 
        'accept': 'application/json',
        'api-key': apiKey 
    }
    payload = { 
        'contents': contents,
        'generationConfig': {
            'maxOutputTokens': maxOutputTokens,
            'temperature': temperature
        },
        'stream': stream
    }
    
    response = requests.post(url, json=payload, headers=headers)

    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print('Error:', response)
        print('Payload:', json.dumps(payload, indent=2))
        print('Response Text:', response.text)
        print('URL:', url)
        print('Headers:', headers)
        return 'Error:', response

In [19]:
result = Gemini(message="hello", model_name="gemini-1.5-flash", temperature=0.5, maxOutputTokens=10, stream=False)
print(json.dumps(result, indent=4))

{
    "candidates": [
        {
            "content": {
                "role": "model",
                "parts": [
                    {
                        "text": "Hello there! How can I help you today?"
                    }
                ]
            },
            "finishReason": "MAX_TOKENS",
            "avgLogprobs": -0.0006957607809454202,
            "index": 0
        }
    ],
    "usageMetadata": {
        "promptTokenCount": 1,
        "candidatesTokenCount": 10,
        "totalTokenCount": 11,
        "promptTokensDetails": [
            {
                "modality": "TEXT",
                "tokenCount": 1
            }
        ],
        "candidatesTokensDetails": [
            {
                "modality": "TEXT",
                "tokenCount": 10
            }
        ]
    },
    "modelVersion": "gemini-1.5-flash-002",
    "createTime": "2025-02-21T09:21:16.058832Z",
    "responseId": "jEW4Z9DLA4PP-dIPubTK0A8"
}


### Llama Models

In [20]:
llama_models = [
    {
        "model": "llama3_1",
        "api-version": "20240723"
    }
]

In [21]:
def llama(message, model_name="llama3_1", imageURL=None, temperature=0, max_tokens=100, top_p=1.0, top_k=50, stop_sequences=None, stream=False, system=None):
    # Find the model in the claude_models list
    model_info = next((model for model in llama_models if model["model"] == model_name), None)
    
    if not model_info:
        raise ValueError(f"Model {model_name} not found in claude_models list")
    
    api_version = model_info['api-version']
    
    conversation = [{"role": "user", "content": message}]
    
    if imageURL:
        conversation[0]["content"] = [
            {"type": "text", "text": message},
            {"type": "image_url", "image_url": {"url": imageURL, "detail": "low"}}
        ]
    
    url = basicUrl + "/deployments/" + model_name + "/llama/completion/?api-version=" + api_version
    headers = { 'Content-Type': 'application/json', 'api-key': apiKey }
    payload = { 
        'messages': conversation,
        'temperature': temperature,
        'max_tokens': max_tokens,
        'top_p': top_p,
        'top_k': top_k,
        'stop_sequences': stop_sequences,
        'stream': stream,
        'system': system
    }
    
    response = requests.post(url, json=payload, headers=headers)

    if response.status_code == 200:
        data = response.json()
        return data
    else:
        return 'Error:', response

In [None]:
result = llama(
    message="hello", 
    model_name="llama3_1", 
    imageURL=None, 
    temperature=0.5, 
    max_tokens=10, 
    top_p=1.0, 
    top_k=50, 
    stop_sequences=None, 
    stream=False, 
    system=None
    )
print(json.dumps(result, indent=4))

{
    "choices": [
        {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
                "content": "Hello! How are you today? Is there something I can help you with or would you like to chat?",
                "role": "assistant"
            }
        }
    ],
    "created": 1740129678,
    "id": "2025-02-21|01:21:18.256198-08|2.65.20.36|1964221016",
    "model": "meta/llama-3.1-405b-instruct-maas",
    "object": "chat.completion",
    "system_fingerprint": "",
    "usage": {
        "completion_tokens": 22,
        "prompt_tokens": 1,
        "total_tokens": 23
    }
}


### Combined 

In [23]:
models = [
    {"model": "gpt-4-o", "api-version": "2024-10-21", "type": "openai"},
    {"model": "gpt-4-o-mini", "api-version": "2024-10-21", "type": "openai"},
    {"model": "o1-preview", "api-version": "2024-10-21", "type": "openai"},
    {"model": "o1-mini", "api-version": "2024-10-21", "type": "openai"},
    {"model": "text-embedding-3-large", "api-version": "2024-05-01-preview", "type": "openai"},
    {"model": "text-embedding-3-small", "api-version": "2024-05-01-preview", "type": "openai"},
    {"model": "claude-3-5-sonnet", "api-version": "20240620", "type": "claude"},
    {"model": "claude-3-haiku", "api-version": "20240307", "type": "claude"},
    {"model": "gemini-1.5-pro", "api-version": "002", "type": "gemini"},
    {"model": "gemini-1.5-flash", "api-version": "002", "type": "gemini"},
    {"model": "llama3_1", "api-version": "20240723", "type": "llama"},
]

In [24]:
def query_model(message, model_name, kwargs):
    model_info = next((model for model in models if model["model"] == model_name), None)
    if not model_info:
        raise ValueError(f"Model {model_name} not found")

    api_version = model_info['api-version']
    model_type = model_info['type']

    headers = {'Content-Type': 'application/json', 'api-key': apiKey}

    if model_type == "openai":
        url = f"{basicUrl}/deployments/{model_name}/chat/completions/?api-version={api_version}"
        payload = {
            'messages': [{"role": "user", "content": message}],
            'temperature': kwargs.get('temperature', 0),
            'max_tokens': kwargs.get('max_tokens', 100)
        }
        if kwargs.get('imageURL'):
             payload['messages'][0]['content'] = [
                {"type": "text", "text": message},
                {"type": "image_url", "image_url": {"url": kwargs.get('imageURL'), "detail": "low"}}
            ]
    elif model_type == "claude":
        url = f"{basicUrl}/deployments/{model_name}/messages/?api-version={api_version}"
        payload = {
            'messages': [{"role": "user", "content": message}],
            'temperature': kwargs.get('temperature', 0),
            'max_tokens': kwargs.get('max_tokens', 100)
        }
        if kwargs.get('imageURL'):
             payload['messages'][0]['content'] = [
                {"type": "text", "text": message},
                {"type": "image_url", "image_url": {"url": kwargs.get('imageURL'), "detail": "low"}}
            ]
    elif model_type == "gemini":
        url = f"{basicUrl}/deployments/{model_name}/generate_content?api-version={api_version}"
        headers['accept'] = 'application/json'  # Gemini requires this header
        payload = {
            'contents': [{"role": "user", "parts": [{"text": message}]}],
            'generationConfig': {
                'maxOutputTokens': kwargs.get('maxOutputTokens', 10),
                'temperature': kwargs.get('temperature', 0.7)
            },
            'stream': kwargs.get('stream', False)
        }
    elif model_type == "llama":
        url = f"{basicUrl}/deployments/{model_name}/llama/completion/?api-version={api_version}"
        payload = {
            'messages': [{"role": "user", "content": message}],
            'temperature': kwargs.get('temperature', 0),
            'max_tokens': kwargs.get('max_tokens', 100),
            'top_p': kwargs.get('top_p', 1.0),
            'top_k': kwargs.get('top_k', 50),
            'stop_sequences': kwargs.get('stop_sequences', None),
            'stream': kwargs.get('stream', False),
            'system': kwargs.get('system', None)
        }
        if kwargs.get('imageURL'):
             payload['messages'][0]['content'] = [
                {"type": "text", "text": message},
                {"type": "image_url", "image_url": {"url": kwargs.get('imageURL'), "detail": "low"}}
            ]
    else:
        raise ValueError(f"Unknown model type: {model_type}")

    response = requests.post(url, json=payload, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        print(f"Response: {response.text}")
        return None


In [None]:
# Example usage
result = query_model(message="hello", model_name="gemini-1.5-flash", temperature=0.5, maxOutputTokens=6)
print(json.dumps(result, indent=4))

result = query_model(message="hello", model_name="gpt-4-o-mini", temperature=0.5, max_tokens=6)
print(json.dumps(result, indent=4))

result = query_model(message="hello", model_name="llama3_1", temperature=0.5, max_tokens=6)
print(json.dumps(result, indent=4))

result = query_model(message="hello", model_name="claude-3-haiku", temperature=0.5, max_tokens=6)
print(json.dumps(result, indent=4))

TypeError: query_model() got an unexpected keyword argument 'maxOutputTokens'