-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
gemini-audio-transcription.js
53 lines (46 loc) · 1.75 KB
/
gemini-audio-transcription.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// [START generativeaionvertexai_gemini_audio_transcription]
const {VertexAI} = require('@google-cloud/vertexai');
/**
* TODO(developer): Update these variables before running the sample.
*/
async function transcript_audio(projectId = 'PROJECT_ID') {
const vertexAI = new VertexAI({project: projectId, location: 'us-central1'});
const generativeModel = vertexAI.getGenerativeModel({
model: 'gemini-1.5-pro-preview-0409',
});
const filePart = {
file_data: {
file_uri: 'gs://cloud-samples-data/generative-ai/audio/pixel.mp3',
mime_type: 'audio/mpeg',
},
};
const textPart = {
text: `
Can you transcribe this interview, in the format of timecode, speaker, caption?
Use speaker A, speaker B, etc. to identify speakers.`,
};
const request = {
contents: [{role: 'user', parts: [filePart, textPart]}],
};
const resp = await generativeModel.generateContent(request);
const contentResponse = await resp.response;
console.log(JSON.stringify(contentResponse));
}
// [END generativeaionvertexai_gemini_audio_transcription]
transcript_audio(...process.argv.slice(2)).catch(err => {
console.error(err.message);
process.exitCode = 1;
});