Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions skllm/llm/gpt/mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,20 @@ def _set_keys(self, key: Optional[str] = None, org: Optional[str] = None) -> Non
"""
Set the OpenAI key and organization.
"""
self.openai_key = key
self.openai_org = org

self.key = key
self.org = org


def _get_openai_key(self) -> str:
"""
Get the OpenAI key from the class or the config file.

Returns
-------
openai_key: str
key: str
"""
key = self.openai_key
key = self.key
if key is None:
key = _Config.get_openai_key()
if key is None:
Expand All @@ -90,14 +92,14 @@ def _get_openai_org(self) -> str:

Returns
-------
openai_org: str
org: str
"""
key = self.openai_org
if key is None:
key = _Config.get_openai_org()
if key is None:
org = self.org
if org is None:
org = _Config.get_openai_org()
if org is None:
raise RuntimeError("OpenAI organization was not found")
return key
return org


class GPTTextCompletionMixin(GPTMixin, BaseTextCompletionMixin):
Expand Down Expand Up @@ -262,4 +264,4 @@ def _tune(self, X, y):
self.openai_model = job.fine_tuned_model
self.model = self.openai_model # openai_model is probably not required anymore
delete_file(client, job.training_file)
print(f"Finished training.")
print(f"Finished training.")
16 changes: 12 additions & 4 deletions skllm/models/_base/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
BaseEstimator as _SklBaseEstimator,
ClassifierMixin as _SklClassifierMixin,
)
import warnings
import numpy as np
import pandas as pd
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import random
from collections import Counter
from skllm.llm.base import (
Expand Down Expand Up @@ -211,24 +213,30 @@ def fit(
self.classes_, self.probabilities_ = self._get_unique_targets(y)
return self

def predict(self, X: Union[np.ndarray, pd.Series, List[str]]):
def predict(self, X: Union[np.ndarray, pd.Series, List[str]], num_workers: int = 1):
"""
Predicts the class of each input.

Parameters
----------
X : Union[np.ndarray, pd.Series, List[str]]
The input data to predict the class of.

num_workers : int
number of workers to use for multithreaded prediction, default 1

Returns
-------
np.ndarray
The predicted classes as a numpy array.
"""
X = _to_numpy(X)
predictions = []
for i in tqdm(range(len(X))):
predictions.append(self._predict_single(X[i]))

if num_workers > 1:
warnings.warn("Passing num_workers to predict is temporary and will be removed in the future.")
with ThreadPoolExecutor(max_workers=num_workers) as executor:
predictions = list(tqdm(executor.map(self._predict_single, X), total=len(X)))

return np.array(predictions)

def _get_unique_targets(self, y: Any):
Expand Down
2 changes: 1 addition & 1 deletion skllm/models/gpt/classification/few_shot.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def __init__(
metric used for similarity search, by default "euclidean"
"""
if vectorizer is None:
vectorizer = GPTVectorizer(model="text-embedding-ada-002")
vectorizer = GPTVectorizer(model="text-embedding-ada-002", key=key, org=org)
super().__init__(
model=model,
default_label=default_label,
Expand Down