From 16f54687495ee69541c120533b5884443bdf0dc1 Mon Sep 17 00:00:00 2001
From: Ogundepo Odunayo <ogundepoodunayo@gmail.com>
Date: Thu, 1 Jun 2023 00:23:41 -0400
Subject: [PATCH 1/4] add docstrings for openai and preprocessing moduls

---
 skllm/gpt4all_client.py               | 22 ++++++++++-
 skllm/models/gpt_zero_shot_clf.py     | 25 ++++++++++++
 skllm/openai/chatgpt.py               | 51 +++++++++++++++++++++++--
 skllm/openai/credentials.py           | 12 +++++-
 skllm/openai/embeddings.py            | 25 +++++++++++-
 skllm/openai/mixin.py                 | 21 +++++++++-
 skllm/preprocessing/gpt_summarizer.py | 30 ++++++++++++++-
 skllm/preprocessing/gpt_translator.py |  2 +-
 skllm/preprocessing/gpt_vectorizer.py | 55 ++++++++++++++++++++++++++-
 skllm/utils.py                        | 32 ++++++++++++++--
 10 files changed, 260 insertions(+), 15 deletions(-)

diff --git a/skllm/gpt4all_client.py b/skllm/gpt4all_client.py
index e8ef0ae..943dc94 100644
--- a/skllm/gpt4all_client.py
+++ b/skllm/gpt4all_client.py
@@ -1,3 +1,5 @@
+from typing import Dict
+
 try:
     from gpt4all import GPT4All
 except (ImportError, ModuleNotFoundError):
@@ -6,18 +8,34 @@
 _loaded_models = {}
 
 
-def get_chat_completion(messages, model="ggml-gpt4all-j-v1.3-groovy"):
+def get_chat_completion(messages: str, model: str="ggml-gpt4all-j-v1.3-groovy") -> Dict:
+    """
+    Get a chat completion from GPT4All which Format list of message dictionaries into a prompt and call model
+    generate on prompt
+
+    Parameters
+    ----------
+    messages : str
+        The messages to use as a prompt for the chat completion.
+    model : str
+        The model to use for the chat completion. Defaults to "ggml-gpt4all-j-v1.3-groovy".
+
+    Returns
+    -------
+    completion : Dict
+    """
     if GPT4All is None:
         raise ImportError(
             "gpt4all is not installed, try `pip install scikit-llm[gpt4all]`"
         )
     if model not in _loaded_models.keys():
         _loaded_models[model] = GPT4All(model)
+
     return _loaded_models[model].chat_completion(
         messages, verbose=False, streaming=False, temp=1e-10
     )
 
 
-def unload_models():
+def unload_models() -> None:
     global _loaded_models
     _loaded_models = {}
diff --git a/skllm/models/gpt_zero_shot_clf.py b/skllm/models/gpt_zero_shot_clf.py
index fe64cfb..05cd03e 100644
--- a/skllm/models/gpt_zero_shot_clf.py
+++ b/skllm/models/gpt_zero_shot_clf.py
@@ -19,6 +19,18 @@
 
 
 class _BaseZeroShotGPTClassifier(ABC, BaseEstimator, ClassifierMixin, _OAIMixin):
+    """
+    A base class for zero-shot classification using GPT-3.
+
+    Initialization Parameters
+    ----------
+    openai_key : str, optional
+        The OPEN AI key to use. Defaults to None.
+    openai_org : str, optional
+        The OPEN AI organization to use. Defaults to None.
+    openai_model : str, optional
+        The OPEN AI model to use. Defaults to "gpt-3.5-turbo".
+    """
     def __init__(
         self,
         openai_key: Optional[str] = None,
@@ -29,6 +41,19 @@ def __init__(
         self.openai_model = openai_model
 
     def _to_np(self, X):
+        """
+        Convert X to a numpy array.
+        
+        Parameters
+        ----------
+        X : Any
+            The input data to convert to a numpy array.
+        
+        Returns
+        -------
+        np.ndarray
+            The input data as a numpy array.
+        """
         return _to_numpy(X)
 
     def fit(
diff --git a/skllm/openai/chatgpt.py b/skllm/openai/chatgpt.py
index dc1d862..576ee3c 100644
--- a/skllm/openai/chatgpt.py
+++ b/skllm/openai/chatgpt.py
@@ -1,5 +1,6 @@
 import json
 from time import sleep
+from typing import Any
 
 import openai
 
@@ -7,13 +8,47 @@
 from skllm.utils import find_json_in_string
 
 
-def construct_message(role, content):
+def construct_message(role: str, content: str) -> dict:
+    """
+    Construct a message for the OpenAI API.
+    
+    Parameters
+    ----------
+    role : str
+        The role of the message. Must be one of "system", "user", or "assistant".
+    content : str
+        The content of the message.
+
+    Returns
+    -------
+    message : dict
+    """
     if role not in ("system", "user", "assistant"):
         raise ValueError("Invalid role")
     return {"role": role, "content": content}
 
 
-def get_chat_completion(messages, key, org, model="gpt-3.5-turbo", max_retries=3):
+def get_chat_completion(messages: str, key: str, org: str, model: str="gpt-3.5-turbo", max_retries: int=3):
+    """
+    Get a chat completion from the OpenAI API.
+    
+    Parameters
+    ----------
+    messages : str
+        input messages to use.
+    key : str
+        The OPEN AI key to use.
+    org : str
+        The OPEN AI organization to use.
+    model : str, optional
+        The OPEN AI model to use. Defaults to "gpt-3.5-turbo".
+    max_retries : int, optional
+        The maximum number of retries to use. Defaults to 3.
+    
+    Returns
+    -------
+    completion : dict
+    """
     set_credentials(key, org)
     error_msg = None
     error_type = None
@@ -33,7 +68,17 @@ def get_chat_completion(messages, key, org, model="gpt-3.5-turbo", max_retries=3
     )
 
 
-def extract_json_key(json_, key):
+def extract_json_key(json_: str, key: str) -> Any:
+    """
+    Extracts a key from a JSON string.
+    
+    Parameters
+    ----------
+    json_ : str
+        The JSON string to extract the key from.
+    key : str
+        The key to extract.
+    """
     try:
         json_ = json_.replace("\n", "")
         json_ = find_json_in_string(json_)
diff --git a/skllm/openai/credentials.py b/skllm/openai/credentials.py
index 6abdc42..959c20f 100644
--- a/skllm/openai/credentials.py
+++ b/skllm/openai/credentials.py
@@ -1,5 +1,15 @@
 import openai
 
-def set_credentials(key: str, org: str):
+def set_credentials(key: str, org: str) -> None:
+    """
+    Set the OpenAI key and organization.
+
+    Parameters
+    ----------
+    key : str
+        The OpenAI key to use.
+    org : str
+        The OpenAI organization to use.
+    """
     openai.api_key = key
     openai.organization = org
diff --git a/skllm/openai/embeddings.py b/skllm/openai/embeddings.py
index cd45b8a..3cf1fa7 100644
--- a/skllm/openai/embeddings.py
+++ b/skllm/openai/embeddings.py
@@ -3,8 +3,29 @@
 from skllm.openai.credentials import set_credentials
 
 def get_embedding(
-    text, key: str, org: str, model="text-embedding-ada-002", max_retries=3
-):
+    text: str, key: str, org: str, model: str="text-embedding-ada-002", max_retries: int=3
+):  
+    """
+    Encode a string and return the GPT embedding for a string.
+
+    Parameters
+    ----------
+    text : str
+        The string to encode.
+    key : str
+        The OPEN AI key to use.
+    org : str
+        The OPEN AI organization to use.
+    model : str, optional  
+        The OPEN AI model to use. Defaults to "text-embedding-ada-002".
+    max_retries : int, optional
+        The maximum number of retries to use. Defaults to 3.
+    
+    Returns
+    -------
+    emb : list
+        The GPT embedding for the string.
+    """
     set_credentials(key, org)
     text = text.replace("\n", " ")
     error_msg = None
diff --git a/skllm/openai/mixin.py b/skllm/openai/mixin.py
index c446207..b5dd5e9 100644
--- a/skllm/openai/mixin.py
+++ b/skllm/openai/mixin.py
@@ -2,12 +2,24 @@
 from skllm.config import SKLLMConfig as _Config
 
 class OpenAIMixin:
-
+    """
+    A mixin class that provides OpenAI key and organization to other classes.
+    """
     def _set_keys(self, key: Optional[str] = None, org: Optional[str] = None) -> None:
+        """
+        Set the OpenAI key and organization.
+        """
         self.openai_key = key
         self.openai_org = org
 
     def _get_openai_key(self) -> str:
+        """
+        Get the OpenAI key from the class or the config file.
+        
+        Returns
+        -------
+        openai_key: str
+        """
         key = self.openai_key
         if key is None:
             key = _Config.get_openai_key()
@@ -16,6 +28,13 @@ def _get_openai_key(self) -> str:
         return key
 
     def _get_openai_org(self) -> str:
+        """
+        Get the OpenAI organization from the class or the config file.
+
+        Returns
+        -------
+        openai_org: str
+        """
         key = self.openai_org
         if key is None:
             key = _Config.get_openai_org()
diff --git a/skllm/preprocessing/gpt_summarizer.py b/skllm/preprocessing/gpt_summarizer.py
index 8804275..2260a1a 100644
--- a/skllm/preprocessing/gpt_summarizer.py
+++ b/skllm/preprocessing/gpt_summarizer.py
@@ -5,6 +5,21 @@
 
 
 class GPTSummarizer(_BaseGPT):
+    """
+    A text summarizer.
+    
+    Parameters
+    ----------
+    openai_key : str, optional
+        The OPEN AI key to use. Defaults to None.
+    openai_org : str, optional
+        The OPEN AI organization to use. Defaults to None.
+    openai_model : str, optional
+        The OPEN AI model to use. Defaults to "gpt-3.5-turbo".
+    max_words : int, optional
+        The maximum number of words to use in the summary. Defaults to 15.
+    
+    """
     system_msg = "You are a text summarizer."
     default_output = "Summary is unavailable."
 
@@ -18,6 +33,19 @@ def __init__(
         self._set_keys(openai_key, openai_org)
         self.openai_model = openai_model
         self.max_words = max_words
+        
 
-    def _get_prompt(self, X) -> str:
+    def _get_prompt(self, X: str) -> str:
+        """
+        Generates the prompt for the given input.
+        
+        Parameters
+        ----------
+        X : str
+            sample to summarize
+        
+        Returns
+        -------
+        str
+        """
         return build_summary_prompt(X, self.max_words)
diff --git a/skllm/preprocessing/gpt_translator.py b/skllm/preprocessing/gpt_translator.py
index 35e4d68..368ef58 100644
--- a/skllm/preprocessing/gpt_translator.py
+++ b/skllm/preprocessing/gpt_translator.py
@@ -16,7 +16,7 @@ def __init__(
         openai_org: Optional[str] = None,
         openai_model: str = "gpt-3.5-turbo",
         output_language: str = "English",
-    ):
+    ) -> None:
         self._set_keys(openai_key, openai_org)
         self.openai_model = openai_model
         self.output_language = output_language
diff --git a/skllm/preprocessing/gpt_vectorizer.py b/skllm/preprocessing/gpt_vectorizer.py
index 3bf72a5..ff0d54b 100644
--- a/skllm/preprocessing/gpt_vectorizer.py
+++ b/skllm/preprocessing/gpt_vectorizer.py
@@ -13,6 +13,18 @@
 
 
 class GPTVectorizer(_BaseEstimator, _TransformerMixin, _OAIMixin):
+    """
+    A class that uses OPEN AI embedding model that converts text to GPT embeddings.
+
+    Parameters
+    ----------
+    openai_embedding_model : str
+        The OPEN AI embedding model to use. Defaults to "text-embedding-ada-002".
+    openai_key : str, optional
+        The OPEN AI key to use. Defaults to None.
+    openai_org : str, optional
+        The OPEN AI organization to use. Defaults to None.
+    """
     def __init__(
         self,
         openai_embedding_model: str = "text-embedding-ada-002",
@@ -22,10 +34,37 @@ def __init__(
         self.openai_embedding_model = openai_embedding_model
         self._set_keys(openai_key, openai_org)
 
-    def fit(self, X: Any = None, y: Any = None, **kwargs):
+    def fit(self, X: Any = None, y: Any = None, **kwargs) -> "GPTVectorizer":
+        """
+        Fit the GPTVectorizer to the data.
+        This is modelled to function as the sklearn fit method
+
+        Parameters
+        ----------
+        X : Any, optional
+        y : Any, optional
+        kwargs : dict, optional
+
+        Returns
+        -------
+        self : GPTVectorizer
+        """
         return self
 
     def transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]]) -> ndarray:
+        """
+        Transform a list of strings into a list of GPT embeddings.
+        This is modelled to function as the sklearn transform meethod
+
+        Parameters
+        ----------
+        X : np.ndarray, pd.Series, or list
+            The list of strings to transform into GPT embeddings.
+        
+        Returns
+        -------
+        embeddings : np.ndarray
+        """
         X = _to_numpy(X)
         embeddings = []
         for i in tqdm(range(len(X))):
@@ -36,4 +75,18 @@ def transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]]) -> nda
         return embeddings
 
     def fit_transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]], y=None, **fit_params) -> ndarray:
+        """
+        Fit and transform a list of strings into a list of GPT embeddings.
+        This is modelled to function as the sklearn fit_transform method
+
+        Parameters
+        ----------
+        X : np.ndarray, pd.Series, or list
+            The list of strings to transform into GPT embeddings.
+        y : Any, optional
+
+        Returns
+        -------
+        embeddings : np.ndarray
+        """
         return self.fit(X, y).transform(X)
diff --git a/skllm/utils.py b/skllm/utils.py
index 49b2c03..22a39a1 100644
--- a/skllm/utils.py
+++ b/skllm/utils.py
@@ -1,8 +1,21 @@
 import numpy as np
 import pandas as pd
 
+from typing import Any
 
-def to_numpy(X):
+def to_numpy(X: Any) -> np.ndarray:
+    """
+    Convert a pandas Series or list to a numpy array.
+
+    Parameters
+    ----------
+    X : pd.Series or list
+        The data to convert to a numpy array.
+    
+    Returns
+    -------
+    X : np.ndarray
+    """
     if isinstance(X, pd.Series):
         X = X.to_numpy().astype(object)
     elif isinstance(X, list):
@@ -12,11 +25,24 @@ def to_numpy(X):
     return X
 
 
-def find_json_in_string(string):
+def find_json_in_string(string: str) -> str:
+    """
+    Find the first JSON object in a string.
+    
+    Parameters
+    ----------
+    string : str
+        The string to search for a JSON object.
+    
+    Returns
+    -------
+    json_string : str
+    """
+
     start = string.find("{")
     end = string.rfind("}")
     if start != -1 and end != -1:
         json_string = string[start : end + 1]
     else:
-        json_string = {}
+        json_string = "{}"
     return json_string

From 5a12ef6aa20e7fdedd3359841899ed4cf433e849 Mon Sep 17 00:00:00 2001
From: Ogundepo Odunayo <ogundepoodunayo@gmail.com>
Date: Thu, 1 Jun 2023 21:24:28 -0400
Subject: [PATCH 2/4] Update class methods

---
 skllm/models/gpt_zero_shot_clf.py     | 94 +++++++++++++++++++++++++--
 skllm/openai/base_gpt.py              | 54 ++++++++++++++-
 skllm/openai/chatgpt.py               |  2 +-
 skllm/openai/credentials.py           |  2 +-
 skllm/openai/embeddings.py            |  2 +-
 skllm/openai/mixin.py                 |  2 +-
 skllm/preprocessing/gpt_summarizer.py |  4 +-
 skllm/preprocessing/gpt_vectorizer.py |  6 +-
 8 files changed, 153 insertions(+), 13 deletions(-)

diff --git a/skllm/models/gpt_zero_shot_clf.py b/skllm/models/gpt_zero_shot_clf.py
index 05cd03e..ed5e1c5 100644
--- a/skllm/models/gpt_zero_shot_clf.py
+++ b/skllm/models/gpt_zero_shot_clf.py
@@ -27,7 +27,7 @@ class _BaseZeroShotGPTClassifier(ABC, BaseEstimator, ClassifierMixin, _OAIMixin)
     openai_key : str, optional
         The OPEN AI key to use. Defaults to None.
     openai_org : str, optional
-        The OPEN AI organization to use. Defaults to None.
+        The OPEN AI organization ID to use. Defaults to None.
     openai_model : str, optional
         The OPEN AI model to use. Defaults to "gpt-3.5-turbo".
     """
@@ -66,6 +66,18 @@ def fit(
         return self
 
     def predict(self, X: Union[np.ndarray, pd.Series, List[str]]):
+        """
+        Predict the class of each input.
+        
+        Parameters
+        ----------
+        X : Union[np.ndarray, pd.Series, List[str]]
+            The input data to predict the class of.
+        
+        Returns
+        -------
+        List[str]
+        """
         X = self._to_np(X)
         predictions = []
         for i in tqdm(range(len(X))):
@@ -76,7 +88,7 @@ def predict(self, X: Union[np.ndarray, pd.Series, List[str]]):
     def _extract_labels(self, y: Any) -> List[str]:
         pass
 
-    def _get_unique_targets(self, y):
+    def _get_unique_targets(self, y:Any):
         labels = self._extract_labels(y)
 
         counts = Counter(labels)
@@ -102,6 +114,9 @@ def _get_chat_completion(self, x):
 
 
 class ZeroShotGPTClassifier(_BaseZeroShotGPTClassifier):
+    """
+    A zero-shot classifier using GPT-3.
+    """
     def __init__(
         self,
         openai_key: Optional[str] = None,
@@ -111,6 +126,17 @@ def __init__(
         super().__init__(openai_key, openai_org, openai_model)
 
     def _extract_labels(self, y: Any) -> List[str]:
+        """
+        Return the class labels as a list.
+
+        Parameters
+        ----------
+        y : Any
+        
+        Returns
+        -------
+        List[str]
+        """
         if isinstance(y, (pd.Series, np.ndarray)):
             labels = y.tolist()
         else:
@@ -120,7 +146,19 @@ def _extract_labels(self, y: Any) -> List[str]:
     def _get_prompt(self, x) -> str:
         return build_zero_shot_prompt_slc(x, repr(self.classes_))
 
-    def _predict_single(self, x):
+    def _predict_single(self, x: str) -> str:
+        """
+        Predict the class for a single input.
+
+        Parameters
+        ----------
+        x : str
+            The input to predict the class of.
+        
+        Returns
+        -------
+        str
+        """
         completion = self._get_chat_completion(x)
         try:
             if self.openai_model.startswith("gpt4all::"):
@@ -154,6 +192,20 @@ def fit(
 
 
 class MultiLabelZeroShotGPTClassifier(_BaseZeroShotGPTClassifier):
+    """
+    A zero-shot classifier using GPT-3 for multi-label classification.
+    
+    Initialization Parameters
+    ----------
+    openai_key : str, optional
+        The OPEN AI key to use. Defaults to None.
+    openai_org : str, optional
+        The OPEN AI organization ID to use. Defaults to None.
+    openai_model : str, optional
+        The OPEN AI model to use. Defaults to "gpt-3.5-turbo".
+    max_labels : int, optional
+        The maximum number of labels to predict. Defaults to 3.
+    """
     def __init__(
         self,
         openai_key: Optional[str] = None,
@@ -167,6 +219,17 @@ def __init__(
         self.max_labels = max_labels
 
     def _extract_labels(self, y) -> List[str]:
+        """
+        Extract the labels into a list.
+
+        Parameters
+        ----------
+        y : Any
+
+        Returns
+        -------
+        List[str]
+        """
         labels = []
         for l in y:
             for j in l:
@@ -177,7 +240,19 @@ def _extract_labels(self, y) -> List[str]:
     def _get_prompt(self, x) -> str:
         return build_zero_shot_prompt_mlc(x, repr(self.classes_), self.max_labels)
 
-    def _predict_single(self, x):
+    def _predict_single(self, x) -> List[str]:
+        """
+        Predict the class for a single input.
+
+        Parameters
+        ----------
+        x : str
+            The input to predict the class of.
+        
+        Returns
+        -------
+        str
+        """
         completion = self._get_chat_completion(x)
         try:
             labels = extract_json_key(completion.choices[0].message["content"], "label")
@@ -199,4 +274,15 @@ def fit(
         X: Optional[Union[np.ndarray, pd.Series, List[str]]],
         y: List[List[str]],
     ):
+        """
+        Calls the parent fit method.
+        
+        Parameters
+        ----------
+        X : Optional[Union[np.ndarray, pd.Series, List[str]]]
+            The input data.
+        y : List[List[str]]
+            The labels.
+        
+        """
         return super().fit(X, y)
diff --git a/skllm/openai/base_gpt.py b/skllm/openai/base_gpt.py
index 49c3570..65ee50b 100644
--- a/skllm/openai/base_gpt.py
+++ b/skllm/openai/base_gpt.py
@@ -20,6 +20,19 @@ class BaseZeroShotGPTTransformer(_BaseEstimator, _TransformerMixin, _OAIMixin):
     default_output = "Output is unavailable"
 
     def _get_chat_completion(self, X):
+        """
+        Get the chat completion for the given input using open ai API.
+
+        Parameters
+        ----------
+        X : str
+            Input string
+        
+        Returns
+        -------
+        str
+        
+        """
         prompt = self._get_prompt(X)
         msgs = []
         msgs.append(construct_message("system", self.system_msg))
@@ -33,10 +46,37 @@ def _get_chat_completion(self, X):
             print(f"Skipping a sample due to the following error: {str(e)}")
             return self.default_output
             
-    def fit(self, X: Any = None, y: Any = None, **kwargs: Any):
+    def fit(self, X: Any = None, y: Any = None, **kwargs: Any) -> "_BaseZeroShotGPTTransformer":
+        """
+        This method is modelled to function as the sklearn fit method
+        This method does not do anything and is only present to make the
+        class compatible with sklearn pipelines.
+
+        Parameters
+        ----------
+        X : Any, optional
+        y : Any, optional
+        kwargs : dict, optional
+
+        Returns
+        -------
+        self : BaseZeroShotGPTTransformer
+        """
+
         return self
 
     def transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]], **kwargs: Any) -> ndarray:
+        """
+        Convert a list of strings using the open ai API and a predefined prompt.
+
+        Parameters
+        ----------
+        X : Union[np.ndarray, pd.Series, List[str]]
+
+        Returns
+        -------
+        ndarray
+        """
         X = _to_numpy(X)
         transformed = []
         for i in tqdm(range(len(X))):
@@ -47,4 +87,16 @@ def transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]], **kwar
         return transformed
 
     def fit_transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]], y=None, **fit_params) -> ndarray:
+        """
+        Fit and transform a list of strings using the transform method.
+        This is modelled to function as the sklearn fit_transform method
+
+        Parameters
+        ----------
+        X : np.ndarray, pd.Series, or list
+
+        Returns
+        -------
+        ndarray       
+        """
         return self.fit(X, y).transform(X)
\ No newline at end of file
diff --git a/skllm/openai/chatgpt.py b/skllm/openai/chatgpt.py
index 576ee3c..99e5c55 100644
--- a/skllm/openai/chatgpt.py
+++ b/skllm/openai/chatgpt.py
@@ -39,7 +39,7 @@ def get_chat_completion(messages: str, key: str, org: str, model: str="gpt-3.5-t
     key : str
         The OPEN AI key to use.
     org : str
-        The OPEN AI organization to use.
+        The OPEN AI organization ID to use.
     model : str, optional
         The OPEN AI model to use. Defaults to "gpt-3.5-turbo".
     max_retries : int, optional
diff --git a/skllm/openai/credentials.py b/skllm/openai/credentials.py
index 959c20f..ac11815 100644
--- a/skllm/openai/credentials.py
+++ b/skllm/openai/credentials.py
@@ -9,7 +9,7 @@ def set_credentials(key: str, org: str) -> None:
     key : str
         The OpenAI key to use.
     org : str
-        The OpenAI organization to use.
+        The OPEN AI organization ID to use.
     """
     openai.api_key = key
     openai.organization = org
diff --git a/skllm/openai/embeddings.py b/skllm/openai/embeddings.py
index 3cf1fa7..91c6f42 100644
--- a/skllm/openai/embeddings.py
+++ b/skllm/openai/embeddings.py
@@ -15,7 +15,7 @@ def get_embedding(
     key : str
         The OPEN AI key to use.
     org : str
-        The OPEN AI organization to use.
+        The OPEN AI organization ID to use.
     model : str, optional  
         The OPEN AI model to use. Defaults to "text-embedding-ada-002".
     max_retries : int, optional
diff --git a/skllm/openai/mixin.py b/skllm/openai/mixin.py
index b5dd5e9..0877327 100644
--- a/skllm/openai/mixin.py
+++ b/skllm/openai/mixin.py
@@ -29,7 +29,7 @@ def _get_openai_key(self) -> str:
 
     def _get_openai_org(self) -> str:
         """
-        Get the OpenAI organization from the class or the config file.
+        Get the OpenAI organization ID from the class or the config file.
 
         Returns
         -------
diff --git a/skllm/preprocessing/gpt_summarizer.py b/skllm/preprocessing/gpt_summarizer.py
index 2260a1a..e5b22c3 100644
--- a/skllm/preprocessing/gpt_summarizer.py
+++ b/skllm/preprocessing/gpt_summarizer.py
@@ -13,7 +13,7 @@ class GPTSummarizer(_BaseGPT):
     openai_key : str, optional
         The OPEN AI key to use. Defaults to None.
     openai_org : str, optional
-        The OPEN AI organization to use. Defaults to None.
+        The OPEN AI organization ID to use. Defaults to None.
     openai_model : str, optional
         The OPEN AI model to use. Defaults to "gpt-3.5-turbo".
     max_words : int, optional
@@ -33,7 +33,7 @@ def __init__(
         self._set_keys(openai_key, openai_org)
         self.openai_model = openai_model
         self.max_words = max_words
-        
+
 
     def _get_prompt(self, X: str) -> str:
         """
diff --git a/skllm/preprocessing/gpt_vectorizer.py b/skllm/preprocessing/gpt_vectorizer.py
index ff0d54b..5013bdc 100644
--- a/skllm/preprocessing/gpt_vectorizer.py
+++ b/skllm/preprocessing/gpt_vectorizer.py
@@ -23,7 +23,7 @@ class GPTVectorizer(_BaseEstimator, _TransformerMixin, _OAIMixin):
     openai_key : str, optional
         The OPEN AI key to use. Defaults to None.
     openai_org : str, optional
-        The OPEN AI organization to use. Defaults to None.
+        The OPEN AI organization ID to use. Defaults to None.
     """
     def __init__(
         self,
@@ -37,7 +37,9 @@ def __init__(
     def fit(self, X: Any = None, y: Any = None, **kwargs) -> "GPTVectorizer":
         """
         Fit the GPTVectorizer to the data.
-        This is modelled to function as the sklearn fit method
+        This is modelled to function as the sklearn fit method.
+        This method does not do anything and is only present to make the
+        GPTVectorizer compatible with sklearn pipelines.
 
         Parameters
         ----------

From 2e829317eae88918eaa3ad443f0b720d46475024 Mon Sep 17 00:00:00 2001
From: Ogundepo Odunayo <ogundepoodunayo@gmail.com>
Date: Sun, 4 Jun 2023 13:29:43 -0400
Subject: [PATCH 3/4] fix review issues

---
 skllm/gpt4all_client.py               |  4 ++--
 skllm/models/gpt_zero_shot_clf.py     | 14 +++++++++++++-
 skllm/preprocessing/gpt_vectorizer.py | 12 +++++++-----
 skllm/utils.py                        |  2 +-
 4 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/skllm/gpt4all_client.py b/skllm/gpt4all_client.py
index 943dc94..ed699b3 100644
--- a/skllm/gpt4all_client.py
+++ b/skllm/gpt4all_client.py
@@ -8,14 +8,14 @@
 _loaded_models = {}
 
 
-def get_chat_completion(messages: str, model: str="ggml-gpt4all-j-v1.3-groovy") -> Dict:
+def get_chat_completion(messages: Dict, model: str="ggml-gpt4all-j-v1.3-groovy") -> Dict:
     """
     Get a chat completion from GPT4All which Format list of message dictionaries into a prompt and call model
     generate on prompt
 
     Parameters
     ----------
-    messages : str
+    messages : Dict
         The messages to use as a prompt for the chat completion.
     model : str
         The model to use for the chat completion. Defaults to "ggml-gpt4all-j-v1.3-groovy".
diff --git a/skllm/models/gpt_zero_shot_clf.py b/skllm/models/gpt_zero_shot_clf.py
index 264e5c5..c1e9c08 100644
--- a/skllm/models/gpt_zero_shot_clf.py
+++ b/skllm/models/gpt_zero_shot_clf.py
@@ -73,6 +73,18 @@ def fit(
         X: Optional[Union[np.ndarray, pd.Series, List[str]]],
         y: Union[np.ndarray, pd.Series, List[str], List[List[str]]],
     ):
+        """
+        Fits the model by storing the training data and extracting the unique targets.
+        
+        Parameters
+        ----------
+        X : Optional[Union[np.ndarray, pd.Series, List[str]]]
+            The input array data to fit the model to.
+
+        y : Union[np.ndarray, pd.Series, List[str], List[List[str]]]
+            The target array data to fit the model to.
+        
+        """
         X = self._to_np(X)
         self.classes_, self.probabilities_ = self._get_unique_targets(y)
         return self
@@ -307,7 +319,7 @@ def fit(
         Parameters
         ----------
         X : Optional[Union[np.ndarray, pd.Series, List[str]]]
-            The input data.
+            Input array data
         y : List[List[str]]
             The labels.
         
diff --git a/skllm/preprocessing/gpt_vectorizer.py b/skllm/preprocessing/gpt_vectorizer.py
index 14d89c6..0de909b 100644
--- a/skllm/preprocessing/gpt_vectorizer.py
+++ b/skllm/preprocessing/gpt_vectorizer.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from typing import Any, List, Optional, Union
 
 import numpy as np
@@ -34,7 +36,7 @@ def __init__(
         self.openai_embedding_model = openai_embedding_model
         self._set_keys(openai_key, openai_org)
 
-    def fit(self, X: Any = None, y: Any = None, **kwargs) -> "GPTVectorizer":
+    def fit(self, X: Any = None, y: Any = None, **kwargs) -> GPTVectorizer:
         """
         Fit the GPTVectorizer to the data.
         This is modelled to function as the sklearn fit method.
@@ -60,8 +62,8 @@ def transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]]) -> nda
 
         Parameters
         ----------
-        X : np.ndarray, pd.Series, or list
-            The list of strings to transform into GPT embeddings.
+        X : Optional[Union[np.ndarray, pd.Series, List[str]]]
+            The input array of strings to transform into GPT embeddings.
         
         Returns
         -------
@@ -83,8 +85,8 @@ def fit_transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]], y=
 
         Parameters
         ----------
-        X : np.ndarray, pd.Series, or list
-            The list of strings to transform into GPT embeddings.
+        X : Optional[Union[np.ndarray, pd.Series, List[str]]]
+            The input array of strings to transform into GPT embeddings.
         y : Any, optional
 
         Returns
diff --git a/skllm/utils.py b/skllm/utils.py
index 22a39a1..f17979b 100644
--- a/skllm/utils.py
+++ b/skllm/utils.py
@@ -27,7 +27,7 @@ def to_numpy(X: Any) -> np.ndarray:
 
 def find_json_in_string(string: str) -> str:
     """
-    Find the first JSON object in a string.
+    Find the JSON object in a string.
     
     Parameters
     ----------

From aa0ba8469bc029f5fedda9be775a7e45eed05d5c Mon Sep 17 00:00:00 2001
From: Ogundepo Odunayo <ogundepoodunayo@gmail.com>
Date: Sun, 4 Jun 2023 14:43:27 -0400
Subject: [PATCH 4/4] Fix more reviews

---
 skllm/completions.py                  |  5 ++++-
 skllm/gpt4all_client.py               |  3 +--
 skllm/models/gpt_few_shot_clf.py      |  3 +--
 skllm/models/gpt_zero_shot_clf.py     | 16 +++++++++++-----
 skllm/openai/base_gpt.py              | 14 +++++++-------
 skllm/openai/chatgpt.py               | 18 +++++++++++++-----
 skllm/openai/embeddings.py            |  4 ++--
 skllm/preprocessing/gpt_vectorizer.py | 10 ++++------
 skllm/utils.py                        |  6 +++---
 9 files changed, 46 insertions(+), 33 deletions(-)

diff --git a/skllm/completions.py b/skllm/completions.py
index 02e07df..dc64911 100644
--- a/skllm/completions.py
+++ b/skllm/completions.py
@@ -3,8 +3,11 @@
 
 
 def get_chat_completion(
-    messages, openai_key=None, openai_org=None, model="gpt-3.5-turbo", max_retries=3
+    messages: dict, openai_key: str=None, openai_org: str=None, model: str="gpt-3.5-turbo", max_retries: int=3
 ):
+    """
+    Gets a chat completion from the OpenAI API.
+    """
     if model.startswith("gpt4all::"):
         return _g4a_get_chat_completion(messages, model[9:])
     else:
diff --git a/skllm/gpt4all_client.py b/skllm/gpt4all_client.py
index ed699b3..2331463 100644
--- a/skllm/gpt4all_client.py
+++ b/skllm/gpt4all_client.py
@@ -10,8 +10,7 @@
 
 def get_chat_completion(messages: Dict, model: str="ggml-gpt4all-j-v1.3-groovy") -> Dict:
     """
-    Get a chat completion from GPT4All which Format list of message dictionaries into a prompt and call model
-    generate on prompt
+    Gets a chat completion from GPT4All
 
     Parameters
     ----------
diff --git a/skllm/models/gpt_few_shot_clf.py b/skllm/models/gpt_few_shot_clf.py
index b552f38..31a6747 100644
--- a/skllm/models/gpt_few_shot_clf.py
+++ b/skllm/models/gpt_few_shot_clf.py
@@ -25,8 +25,7 @@ def fit(
         X: Union[np.ndarray, pd.Series, List[str]],
         y: Union[np.ndarray, pd.Series, List[str]],
     ):
-        """Fits the model by storing the training data and extracting the
-        unique targets.
+        """Fits the model to the given data.
 
         Parameters
         ----------
diff --git a/skllm/models/gpt_zero_shot_clf.py b/skllm/models/gpt_zero_shot_clf.py
index c1e9c08..fd8a75c 100644
--- a/skllm/models/gpt_zero_shot_clf.py
+++ b/skllm/models/gpt_zero_shot_clf.py
@@ -49,7 +49,7 @@ def __init__(
 
     def _to_np(self, X):
         """
-        Convert X to a numpy array.
+        Converts X to a numpy array.
         
         Parameters
         ----------
@@ -74,7 +74,7 @@ def fit(
         y: Union[np.ndarray, pd.Series, List[str], List[List[str]]],
     ):
         """
-        Fits the model by storing the training data and extracting the unique targets.
+        Extracts the target for each datapoint in X.
         
         Parameters
         ----------
@@ -91,7 +91,7 @@ def fit(
 
     def predict(self, X: Union[np.ndarray, pd.Series, List[str]]):
         """
-        Predict the class of each input.
+        Predicts the class of each input.
         
         Parameters
         ----------
@@ -193,6 +193,9 @@ def _get_default_label(self):
             return self.default_label
 
     def _predict_single(self, x):
+        """
+        Predicts the labels for a single sample.
+        """
         completion = self._get_chat_completion(x)
         try:
             label = str(
@@ -256,7 +259,7 @@ def __init__(
 
     def _extract_labels(self, y) -> List[str]:
         """
-        Extract the labels into a list.
+        Extracts the labels into a list.
 
         Parameters
         ----------
@@ -290,6 +293,9 @@ def _get_default_label(self):
         return result
 
     def _predict_single(self, x):
+        """
+        Predicts the labels for a single sample.
+        """
         completion = self._get_chat_completion(x)
         try:
             labels = extract_json_key(completion["choices"][0]["message"]["content"], "label")
@@ -314,7 +320,7 @@ def fit(
         y: List[List[str]],
     ):
         """
-        Calls the parent fit method.
+        Calls the parent fit method on input data.
         
         Parameters
         ----------
diff --git a/skllm/openai/base_gpt.py b/skllm/openai/base_gpt.py
index 0499941..3fcc6c8 100644
--- a/skllm/openai/base_gpt.py
+++ b/skllm/openai/base_gpt.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from typing import Any, List, Optional, Union
 
 import numpy as np
@@ -19,7 +21,7 @@ class BaseZeroShotGPTTransformer(_BaseEstimator, _TransformerMixin, _OAIMixin):
 
     def _get_chat_completion(self, X):
         """
-        Get the chat completion for the given input using open ai API.
+        Gets the chat completion for the given input using open ai API.
 
         Parameters
         ----------
@@ -44,11 +46,9 @@ def _get_chat_completion(self, X):
             print(f"Skipping a sample due to the following error: {str(e)}")
             return self.default_output
             
-    def fit(self, X: Any = None, y: Any = None, **kwargs: Any) -> "_BaseZeroShotGPTTransformer":
+    def fit(self, X: Any = None, y: Any = None, **kwargs: Any) -> BaseZeroShotGPTTransformer:
         """
-        This method is modelled to function as the sklearn fit method
-        This method does not do anything and is only present to make the
-        class compatible with sklearn pipelines.
+        Fits the model to the data.
 
         Parameters
         ----------
@@ -65,7 +65,7 @@ class compatible with sklearn pipelines.
 
     def transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]], **kwargs: Any) -> ndarray:
         """
-        Convert a list of strings using the open ai API and a predefined prompt.
+        Converts a list of strings using the open ai API and a predefined prompt.
 
         Parameters
         ----------
@@ -86,7 +86,7 @@ def transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]], **kwar
 
     def fit_transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]], y=None, **fit_params) -> ndarray:
         """
-        Fit and transform a list of strings using the transform method.
+        Fits and transforms a list of strings using the transform method.
         This is modelled to function as the sklearn fit_transform method
 
         Parameters
diff --git a/skllm/openai/chatgpt.py b/skllm/openai/chatgpt.py
index bef20ac..7ea93a7 100644
--- a/skllm/openai/chatgpt.py
+++ b/skllm/openai/chatgpt.py
@@ -10,7 +10,7 @@
 
 def construct_message(role: str, content: str) -> dict:
     """
-    Construct a message for the OpenAI API.
+    Constructs a message for the OpenAI API.
     
     Parameters
     ----------
@@ -28,13 +28,13 @@ def construct_message(role: str, content: str) -> dict:
     return {"role": role, "content": content}
 
 
-def get_chat_completion(messages: str, key: str, org: str, model: str="gpt-3.5-turbo", max_retries: int=3):
+def get_chat_completion(messages: dict, key: str, org: str, model: str="gpt-3.5-turbo", max_retries: int=3):
     """
-    Get a chat completion from the OpenAI API.
+    Gets a chat completion from the OpenAI API.
     
     Parameters
     ----------
-    messages : str
+    messages : dict
         input messages to use.
     key : str
         The OPEN AI key to use.
@@ -69,7 +69,15 @@ def get_chat_completion(messages: str, key: str, org: str, model: str="gpt-3.5-t
 
 
 
-def extract_json_key(json_, key):
+def extract_json_key(json_: str, key: str):
+    """
+    Extracts JSON key from a string.
+
+    json_ : str
+        The JSON string to extract the key from.
+    key : str
+        The key to extract.
+    """
     original_json = json_
     for i in range(2):
         try:
diff --git a/skllm/openai/embeddings.py b/skllm/openai/embeddings.py
index e150f00..6712bda 100644
--- a/skllm/openai/embeddings.py
+++ b/skllm/openai/embeddings.py
@@ -9,7 +9,7 @@ def get_embedding(
     text: str, key: str, org: str, model: str="text-embedding-ada-002", max_retries: int=3
 ):  
     """
-    Encode a string and return the GPT embedding for a string.
+    Encodes a string and return the embedding for a string.
 
     Parameters
     ----------
@@ -20,7 +20,7 @@ def get_embedding(
     org : str
         The OPEN AI organization ID to use.
     model : str, optional  
-        The OPEN AI model to use. Defaults to "text-embedding-ada-002".
+        The model to use. Defaults to "text-embedding-ada-002".
     max_retries : int, optional
         The maximum number of retries to use. Defaults to 3.
     
diff --git a/skllm/preprocessing/gpt_vectorizer.py b/skllm/preprocessing/gpt_vectorizer.py
index 0de909b..fe4537b 100644
--- a/skllm/preprocessing/gpt_vectorizer.py
+++ b/skllm/preprocessing/gpt_vectorizer.py
@@ -38,10 +38,8 @@ def __init__(
 
     def fit(self, X: Any = None, y: Any = None, **kwargs) -> GPTVectorizer:
         """
-        Fit the GPTVectorizer to the data.
+        Fits the GPTVectorizer to the data.
         This is modelled to function as the sklearn fit method.
-        This method does not do anything and is only present to make the
-        GPTVectorizer compatible with sklearn pipelines.
 
         Parameters
         ----------
@@ -57,8 +55,8 @@ def fit(self, X: Any = None, y: Any = None, **kwargs) -> GPTVectorizer:
 
     def transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]]) -> ndarray:
         """
-        Transform a list of strings into a list of GPT embeddings.
-        This is modelled to function as the sklearn transform meethod
+        Transforms a list of strings into a list of GPT embeddings.
+        This is modelled to function as the sklearn transform method
 
         Parameters
         ----------
@@ -80,7 +78,7 @@ def transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]]) -> nda
 
     def fit_transform(self, X: Optional[Union[np.ndarray, pd.Series, List[str]]], y=None, **fit_params) -> ndarray:
         """
-        Fit and transform a list of strings into a list of GPT embeddings.
+        Fits and transforms a list of strings into a list of GPT embeddings.
         This is modelled to function as the sklearn fit_transform method
 
         Parameters
diff --git a/skllm/utils.py b/skllm/utils.py
index f17979b..eca2289 100644
--- a/skllm/utils.py
+++ b/skllm/utils.py
@@ -5,11 +5,11 @@
 
 def to_numpy(X: Any) -> np.ndarray:
     """
-    Convert a pandas Series or list to a numpy array.
+    Converts a pandas Series or list to a numpy array.
 
     Parameters
     ----------
-    X : pd.Series or list
+    X : Any
         The data to convert to a numpy array.
     
     Returns
@@ -27,7 +27,7 @@ def to_numpy(X: Any) -> np.ndarray:
 
 def find_json_in_string(string: str) -> str:
     """
-    Find the JSON object in a string.
+    Finds the JSON object in a string.
     
     Parameters
     ----------