Portkey-AI · VisargD · Apr 26, 2024 · Apr 13, 2024 · Apr 13, 2024 · Apr 13, 2024
diff --git a/src/handlers/handlerUtils.ts b/src/handlers/handlerUtils.ts
@@ -686,7 +686,7 @@ export async function tryProvidersInSequence(
 /**
  * Handles various types of responses based on the specified parameters
  * and returns a mapped response
- * @param {Response} response - The HTTP response recieved from LLM.
+ * @param {Response} response - The HTTP response received from LLM.
  * @param {boolean} streamingMode - Indicates whether streaming mode is enabled.
  * @param {string} proxyProvider - The provider string.
  * @param {string | undefined} responseTransformer - The response transformer to determine type of call.

diff --git a/src/providers/google-vertex-ai/chatComplete.ts b/src/providers/google-vertex-ai/chatComplete.ts
@@ -1,3 +1,6 @@
+// Docs for REST API
+// https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/send-multimodal-prompts#gemini-send-multimodal-samples-drest
+
 import { GOOGLE_VERTEX_AI } from '../../globals';
 import { ContentType, Message, Params } from '../../types/requestBody';
 import {
@@ -9,31 +12,11 @@ import {
   generateErrorResponse,
   generateInvalidProviderResponseError,
 } from '../utils';
-
-const transformGenerationConfig = (params: Params) => {
-  const generationConfig: Record<string, any> = {};
-  if (params['temperature']) {
-    generationConfig['temperature'] = params['temperature'];
-  }
-  if (params['top_p']) {
-    generationConfig['topP'] = params['top_p'];
-  }
-  if (params['top_k']) {
-    generationConfig['topK'] = params['top_k'];
-  }
-  if (params['max_tokens']) {
-    generationConfig['maxOutputTokens'] = params['max_tokens'];
-  }
-  if (params['stop']) {
-    generationConfig['stopSequences'] = params['stop'];
-  }
-  return generationConfig;
-};
-
-// TODOS: this configuration does not enforce the maximum token limit for the input parameter. If you want to enforce this, you might need to add a custom validation function or a max property to the ParameterConfig interface, and then use it in the input configuration. However, this might be complex because the token count is not a simple length check, but depends on the specific tokenization method used by the model.
-
-// Docs for REST API
-// https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/send-multimodal-prompts#gemini-send-multimodal-samples-drest
+import { transformGenerationConfig } from './transformGenerationConfig';
+import type {
+  GoogleErrorResponse,
+  GoogleGenerateContentResponse,
+} from './types';
 
 export const GoogleChatCompleteConfig: ProviderConfig = {
   // https://cloud.google.com/vertex-ai/generative-ai/docs/learn/model-versioning#gemini-model-versions
@@ -141,6 +124,10 @@ export const GoogleChatCompleteConfig: ProviderConfig = {
     param: 'generationConfig',
     transform: (params: Params) => transformGenerationConfig(params),
   },
+  response_format: {
+    param: 'generationConfig',
+    transform: (params: Params) => transformGenerationConfig(params),
+  },
   // https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/configure-safety-attributes
   // Example payload to be included in the request that sets the safety settings:
   //   "safety_settings": [
@@ -171,51 +158,6 @@ export const GoogleChatCompleteConfig: ProviderConfig = {
   },
 };
 
-export interface GoogleErrorResponse {
-  error: {
-    code: number;
-    message: string;
-    status: string;
-    details: Array<Record<string, any>>;
-  };
-}
-
-interface GoogleGenerateFunctionCall {
-  name: string;
-  args: Record<string, any>;
-}
-
-interface GoogleGenerateContentResponse {
-  candidates: {
-    content: {
-      parts: {
-        text?: string;
-        functionCall?: GoogleGenerateFunctionCall;
-      }[];
-    };
-    finishReason: string;
-    index: 0;
-    safetyRatings: {
-      category: string;
-      probability: string;
-    }[];
-  }[];
-  promptFeedback: {
-    safetyRatings: {
-      category: string;
-      probability: string;
-      probabilityScore: number;
-      severity: string;
-      severityScore: number;
-    }[];
-  };
-  usageMetadata: {
-    promptTokenCount: number;
-    candidatesTokenCount: number;
-    totalTokenCount: number;
-  };
-}
-
 export const GoogleChatCompleteResponseTransform: (
   response:
     | GoogleGenerateContentResponse
@@ -256,6 +198,13 @@ export const GoogleChatCompleteResponseTransform: (
     );
   }
 
+  if (
+    'candidates' in response &&
+    response.candidates[0].finishReason === 'PROHIBITED_CONTENT'
+  ) {
+    return generateInvalidProviderResponseError(response, GOOGLE_VERTEX_AI);
+  }
+
   if ('candidates' in response) {
     const {
       promptTokenCount = 0,

diff --git a/src/providers/google-vertex-ai/transformGenerationConfig.ts b/src/providers/google-vertex-ai/transformGenerationConfig.ts
@@ -0,0 +1,28 @@
+import { Params } from '../../types/requestBody';
+
+/**
+ * @see https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini#request_body
+ */
+export function transformGenerationConfig(params: Params) {
+  const generationConfig: Record<string, any> = {};
+  if (params['temperature']) {
+    generationConfig['temperature'] = params['temperature'];
+  }
+  if (params['top_p']) {
+    generationConfig['topP'] = params['top_p'];
+  }
+  if (params['top_k']) {
+    generationConfig['topK'] = params['top_k'];
+  }
+  if (params['max_tokens']) {
+    generationConfig['maxOutputTokens'] = params['max_tokens'];
+  }
+  if (params['stop']) {
+    generationConfig['stopSequences'] = params['stop'];
+  }
+  if (params?.response_format?.type === 'json_object') {
+    generationConfig['responseMimeType'] = 'application/json';
+  }
+
+  return generationConfig;
+}
diff --git a/src/providers/google-vertex-ai/types.ts b/src/providers/google-vertex-ai/types.ts
@@ -0,0 +1,44 @@
+export interface GoogleErrorResponse {
+  error: {
+    code: number;
+    message: string;
+    status: string;
+    details: Array<Record<string, any>>;
+  };
+}
+
+export interface GoogleGenerateFunctionCall {
+  name: string;
+  args: Record<string, any>;
+}
+
+export interface GoogleGenerateContentResponse {
+  candidates: {
+    content: {
+      parts: {
+        text?: string;
+        functionCall?: GoogleGenerateFunctionCall;
+      }[];
+    };
+    finishReason: string;
+    index: 0;
+    safetyRatings: {
+      category: string;
+      probability: string;
+    }[];
+  }[];
+  promptFeedback: {
+    safetyRatings: {
+      category: string;
+      probability: string;
+      probabilityScore: number;
+      severity: string;
+      severityScore: number;
+    }[];
+  };
+  usageMetadata: {
+    promptTokenCount: number;
+    candidatesTokenCount: number;
+    totalTokenCount: number;
+  };
+}
diff --git a/src/providers/utils.ts b/src/providers/utils.ts
@@ -6,7 +6,7 @@ export const generateInvalidProviderResponseError: (
 ) => ErrorResponse = (response, provider) => {
   return {
     error: {
-      message: `Invalid response recieved from ${provider}: ${JSON.stringify(
+      message: `Invalid response received from ${provider}: ${JSON.stringify(
         response
       )}`,
       type: null,

diff --git a/src/types/requestBody.ts b/src/types/requestBody.ts
@@ -212,6 +212,9 @@ export interface Params {
   examples?: Examples[];
   top_k?: number;
   tools?: Tool[];
+  response_format?: { type: 'json_object' | 'text' };
+  // Google Vertex AI specific
+  safety_settings?: any;
 }
 
 interface Examples {