Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,10 @@ allow-direct-references = true
[tool.black]
line-length = 120
skip-string-normalization = true

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = "-v --tb=short"
30 changes: 30 additions & 0 deletions spectrumlab/config/base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@

@dataclass
class Config:
# This api key is for testing closed MLLMs by Boyue Richdata
BOYUE_API_KEY: str = os.getenv("BOYUE_API_KEY")
BOYUE_BASE_URL: str = os.getenv("BOYUE_BASE_URL")

# DeepSeek API Configuration
deepseek_api_key: str = os.getenv("DEEPSEEK_API_KEY")
deepseek_base_url: str = os.getenv("DEEPSEEK_BASE_URL")
Expand All @@ -25,3 +29,29 @@ class Config:
internvl_api_key: str = os.getenv("INTERNVL_API_KEY")
internvl_base_url: str = os.getenv("INTERNVL_BASE_URL")
internvl_model_name: str = os.getenv("INTERNVL_MODEL_NAME")

# Claude API Configuration
claude_api_key: str = BOYUE_API_KEY
claude_base_url: str = BOYUE_BASE_URL
claude_sonnet_3_5_model_name: str = os.getenv("CLAUDE_SONNET_3_5")
claude_opus_4_model_name: str = os.getenv("CLAUDE_OPUS_4")
claude_haiku_3_5_model_name: str = os.getenv("CLAUDE_HAIKU_3_5")
claude_sonnet_4_model_name: str = os.getenv("CLAUDE_SONNET_4")

# GPT-4.1, GPT-4-Vision
gpt4_1_api_key: str = BOYUE_API_KEY
gpt4_1_base_url: str = BOYUE_BASE_URL
gpt4_1_model_name: str = os.getenv("GPT4_1")
gpt4_vision_api_key: str = BOYUE_API_KEY
gpt4_vision_base_url: str = BOYUE_BASE_URL
gpt4_vision_model_name: str = os.getenv("GPT4_VISION")

# Grok-2-Vision
grok_2_vision_api_key: str = BOYUE_API_KEY
grok_2_vision_base_url: str = BOYUE_BASE_URL
grok_2_vision_model_name: str = os.getenv("GROK_2_VISION")

# Qwen-VL-Max
qwen_vl_api_key: str = BOYUE_API_KEY
qwen_vl_base_url: str = BOYUE_BASE_URL
qwen_vl_model_name: str = os.getenv("QWEN_VL")
7 changes: 6 additions & 1 deletion spectrumlab/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from .deepseek_api import DeepSeek
from .gpt4o_api import GPT4o
from .internvl_api import InternVL
from .claude_api import Claude_Sonnet_3_5, Claude_Opus_4, Claude_Haiku_3_5, Claude_Sonnet_4
from .gpt4_v_api import GPT4_1, GPT4_Vision
from .grok_api import Grok_2_Vision
from .qwen_vl_api import Qwen_VL_Max

__all__ = ["DeepSeek", "GPT4o", "InternVL"]
__all__ = ["DeepSeek", "GPT4o", "InternVL", "Claude_Sonnet_3_5", "Claude_Opus_4",
"Claude_Haiku_3_5", "Claude_Sonnet_4", "GPT4_1", "GPT4_Vision", "Grok_2_Vision", "Qwen_VL_Max"]
288 changes: 288 additions & 0 deletions spectrumlab/models/claude_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
from typing import Optional, Union, Dict, Any
from .base_api import BaseAPIModel
from spectrumlab.config import Config
from openai import OpenAI


class Claude_Sonnet_3_5(BaseAPIModel):
def __init__(
self,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
model_name: Optional[str] = None,
**kwargs,
):
config = Config()

# Use provided parameters or fall back to config
self.api_key = api_key or config.BOYUE_API_KEY
self.base_url = base_url or config.BOYUE_BASE_URL
self.model_name = model_name or config.claude_sonnet_3_5_model_name

# Validate that we have required configuration
if not self.api_key:
raise ValueError(
"Claude API key not found. Please set CLAUDE_API_KEY in your .env file "
"or provide api_key parameter."
)

self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url,
)

# Initialize parent class
super().__init__(model_name=self.model_name, **kwargs)

def generate(
self, prompt: Union[str, Dict[str, Any]], max_out_len: int = 512
) -> str:
"""
Generate response supporting both text and multimodal input.

Args:
prompt: Either text string or multimodal dict
max_out_len: Maximum tokens to generate

Returns:
Generated response string
"""

# Link: https://docs.anthropic.com/claude/reference/getting-started-with-the-api
messages = []

if isinstance(prompt, dict) and "images" in prompt:
content = []

content.append({"type": "text", "text": prompt["text"]})

for image_data in prompt["images"]:
content.append(image_data)

messages.append({"role": "user", "content": content})
else:
text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
messages.append({"role": "user", "content": text_content})

try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=messages,
max_tokens=max_out_len,
)
return response.choices[0].message.content
except Exception as e:
raise RuntimeError(f"Claude API call failed: {e}")


class Claude_Opus_4(BaseAPIModel):
def __init__(
self,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
model_name: Optional[str] = None,
**kwargs,
):
config = Config()

# Use provided parameters or fall back to config
self.api_key = api_key or config.BOYUE_API_KEY
self.base_url = base_url or config.BOYUE_BASE_URL
self.model_name = model_name or config.claude_opus_4_model_name

# Validate that we have required configuration
if not self.api_key:
raise ValueError(
"Claude API key not found. Please set CLAUDE_API_KEY in your .env file "
"or provide api_key parameter."
)

self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url,
)

# Initialize parent class
super().__init__(model_name=self.model_name, **kwargs)

def generate(
self, prompt: Union[str, Dict[str, Any]], max_out_len: int = 512
) -> str:
"""
Generate response supporting both text and multimodal input.

Args:
prompt: Either text string or multimodal dict
max_out_len: Maximum tokens to generate

Returns:
Generated response string
"""

# Link: https://docs.anthropic.com/claude/reference/getting-started-with-the-api
messages = []

if isinstance(prompt, dict) and "images" in prompt:
content = []

content.append({"type": "text", "text": prompt["text"]})

for image_data in prompt["images"]:
content.append(image_data)

messages.append({"role": "user", "content": content})
else:
text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
messages.append({"role": "user", "content": text_content})

try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=messages,
max_tokens=max_out_len,
)
return response.choices[0].message.content
except Exception as e:
raise RuntimeError(f"Claude API call failed: {e}")


class Claude_Haiku_3_5(BaseAPIModel):
def __init__(
self,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
model_name: Optional[str] = None,
**kwargs,
):
config = Config()

# Use provided parameters or fall back to config
self.api_key = api_key or config.BOYUE_API_KEY
self.base_url = base_url or config.BOYUE_BASE_URL
self.model_name = model_name or config.claude_haiku_3_5_model_name

# Validate that we have required configuration
if not self.api_key:
raise ValueError(
"Claude API key not found. Please set CLAUDE_API_KEY in your .env file "
"or provide api_key parameter."
)

self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url,
)

# Initialize parent class
super().__init__(model_name=self.model_name, **kwargs)

def generate(
self, prompt: Union[str, Dict[str, Any]], max_out_len: int = 512
) -> str:
"""
Generate response supporting both text and multimodal input.

Args:
prompt: Either text string or multimodal dict
max_out_len: Maximum tokens to generate

Returns:
Generated response string
"""

# Link: https://docs.anthropic.com/claude/reference/getting-started-with-the-api
messages = []

if isinstance(prompt, dict) and "images" in prompt:
content = []

content.append({"type": "text", "text": prompt["text"]})

for image_data in prompt["images"]:
content.append(image_data)

messages.append({"role": "user", "content": content})
else:
text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
messages.append({"role": "user", "content": text_content})

try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=messages,
max_tokens=max_out_len,
)
return response.choices[0].message.content
except Exception as e:
raise RuntimeError(f"Claude API call failed: {e}")

class Claude_Sonnet_4(BaseAPIModel):
def __init__(
self,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
model_name: Optional[str] = None,
**kwargs,
):
config = Config()

# Use provided parameters or fall back to config
self.api_key = api_key or config.BOYUE_API_KEY
self.base_url = base_url or config.BOYUE_BASE_URL
self.model_name = model_name or config.claude_sonnet_4_model_name

# Validate that we have required configuration
if not self.api_key:
raise ValueError(
"Claude API key not found. Please set CLAUDE_API_KEY in your .env file "
"or provide api_key parameter."
)

self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url,
)

# Initialize parent class
super().__init__(model_name=self.model_name, **kwargs)

def generate(
self, prompt: Union[str, Dict[str, Any]], max_out_len: int = 512
) -> str:
"""
Generate response supporting both text and multimodal input.

Args:
prompt: Either text string or multimodal dict
max_out_len: Maximum tokens to generate

Returns:
Generated response string
"""

# Link: https://docs.anthropic.com/claude/reference/getting-started-with-the-api
messages = []

if isinstance(prompt, dict) and "images" in prompt:
content = []

content.append({"type": "text", "text": prompt["text"]})

for image_data in prompt["images"]:
content.append(image_data)

messages.append({"role": "user", "content": content})
else:
text_content = prompt if isinstance(prompt, str) else prompt.get("text", "")
messages.append({"role": "user", "content": text_content})

try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=messages,
max_tokens=max_out_len,
)
return response.choices[0].message.content
except Exception as e:
raise RuntimeError(f"Claude API call failed: {e}")

Loading