From 4511560c291b7f240ac0e17b4f0ad8c93c18b420 Mon Sep 17 00:00:00 2001 From: AssemblyAI Date: Mon, 15 Apr 2024 09:15:51 -0400 Subject: [PATCH 1/2] Project import generated by Copybara. GitOrigin-RevId: 3642a9bb14cf6fd46dd85091793224c446309b93 --- .github/workflows/api-reference.yml | 6 +- .github/workflows/publish.yml | 6 +- .github/workflows/test.yml | 10 +- .prettierignore | 1 + CHANGELOG.md | 16 ++ jest.config.js | 8 - package.json | 8 +- scripts/kitchensink.ts | 387 -------------------------- src/services/realtime/service.ts | 26 ++ src/services/transcripts/index.ts | 13 + src/types/asyncapi.generated.ts | 36 +++ src/types/openapi.generated.ts | 4 + src/types/realtime/index.ts | 27 ++ tests/integration/file.test.ts | 30 ++ tests/integration/lemur.test.ts | 78 ++++++ tests/integration/realtime.test.ts | 100 +++++++ tests/integration/transcript.test.ts | 141 ++++++++++ tests/{ => unit}/file.test.ts | 0 tests/{ => unit}/lemur.test.ts | 0 tests/{__mocks__ => unit/mocks}/ws.ts | 0 tests/{ => unit}/realtime.test.ts | 19 +- tests/{ => unit}/transcript.test.ts | 47 ++-- tests/{ => unit}/utils.test.ts | 4 +- tests/{ => unit}/utils.ts | 2 +- 24 files changed, 534 insertions(+), 435 deletions(-) delete mode 100644 jest.config.js delete mode 100644 scripts/kitchensink.ts create mode 100644 tests/integration/file.test.ts create mode 100644 tests/integration/lemur.test.ts create mode 100644 tests/integration/realtime.test.ts create mode 100644 tests/integration/transcript.test.ts rename tests/{ => unit}/file.test.ts (100%) rename tests/{ => unit}/lemur.test.ts (100%) rename tests/{__mocks__ => unit/mocks}/ws.ts (100%) rename tests/{ => unit}/realtime.test.ts (92%) rename tests/{ => unit}/transcript.test.ts (89%) rename tests/{ => unit}/utils.test.ts (95%) rename tests/{ => unit}/utils.ts (91%) diff --git a/.github/workflows/api-reference.yml b/.github/workflows/api-reference.yml index 46d08ee..74df8cb 100644 --- a/.github/workflows/api-reference.yml +++ b/.github/workflows/api-reference.yml @@ -25,11 +25,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Setup Node.js 20 - uses: actions/setup-node@v4 + uses: actions/setup-node@v3 with: node-version: 20 - name: Checkout source code - uses: actions/checkout@v4 + uses: actions/checkout@v3 - uses: pnpm/action-setup@v3 with: version: 8.10.0 @@ -38,7 +38,7 @@ jobs: shell: bash run: | echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV - - uses: actions/cache@v4 + - uses: actions/cache@v3 name: Setup pnpm cache with: path: ${{ env.STORE_PATH }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 2587321..bfd5eb4 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -10,11 +10,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Setup Node.js 20 - uses: actions/setup-node@v4 + uses: actions/setup-node@v3 with: node-version: 20 - name: Checkout source code - uses: actions/checkout@v4 + uses: actions/checkout@v3 - name: Install pnpm uses: pnpm/action-setup@v3 with: @@ -24,7 +24,7 @@ jobs: shell: bash run: | echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV - - uses: actions/cache@v4 + - uses: actions/cache@v3 name: Setup pnpm cache with: path: ${{ env.STORE_PATH }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8a7739b..f01aedf 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,11 +21,11 @@ jobs: # - windows-latest steps: - name: Setup Node.js ${{ matrix['node-version'] }} - uses: actions/setup-node@v4 + uses: actions/setup-node@v3 with: node-version: ${{ matrix['node-version'] }} - name: Checkout source code - uses: actions/checkout@v4 + uses: actions/checkout@v3 - name: Install pnpm uses: pnpm/action-setup@v3 with: @@ -35,7 +35,7 @@ jobs: shell: bash run: | echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV - - uses: actions/cache@v4 + - uses: actions/cache@v3 name: Setup pnpm cache with: path: ${{ env.STORE_PATH }} @@ -48,5 +48,5 @@ jobs: run: pnpm build - name: Lint code run: pnpm lint - - name: Run tests - run: pnpm test + - name: Run unit tests + run: pnpm test:unit diff --git a/.prettierignore b/.prettierignore index 868112d..c09dfa0 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,5 @@ .vscode +.gitignore .npmignore coverage dist diff --git a/CHANGELOG.md b/CHANGELOG.md index 136e419..e4ffbff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # Changelog +## [4.4.0] - 2024-04-12 + +### Added + +- Add `disablePartialTranscripts` parameter to `CreateRealtimeTranscriberParams` +- Add `enableExtraSessionInformation` parameter to `CreateRealtimeTranscriberParams` +- Add `session_information` event to `RealtimeTranscriber.on()` + +### Updated + +- ⚠️ Deprecate `conformer-2` literal for `TranscriptParams.speech_model` property + +### Fixed + +- Add missing `status` property to `AutoHighlightsResult` + ## [4.3.4] - 2024-04-02 ### Added diff --git a/jest.config.js b/jest.config.js deleted file mode 100644 index 677dcc4..0000000 --- a/jest.config.js +++ /dev/null @@ -1,8 +0,0 @@ -module.exports = { - preset: "ts-jest", - testEnvironment: "node", - collectCoverage: true, - modulePathIgnorePatterns: ["/dist"], -}; - -process.env.TESTDATA_DIR = "tests/static"; diff --git a/package.json b/package.json index 0efec6e..f43501a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "assemblyai", - "version": "4.3.4", + "version": "4.4.0", "description": "The AssemblyAI JavaScript SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.", "engines": { "node": ">=18" @@ -69,9 +69,11 @@ "build": "pnpm clean && pnpm rollup -c", "clean": "rimraf dist/* && rimraf temp/* && rimraf temp-docs/*", "lint": "eslint -c .eslintrc.json '{src,tests}/**/*.{js,ts}' && publint && tsc --noEmit -p tsconfig.json", - "test": "jest --config jest.config.js", + "test": "pnpm run test:unit && pnpm run test:integration", + "test:unit": "jest --config jest.unit.config.js", + "test:integration": "jest --config jest.integration.config.js --testTimeout 360000", "format": "prettier '**/*' --write", - "generate:types": "tsx ./scripts/generate-types.ts && pnpm format", + "generate:types": "tsx ./scripts/generate-types.ts && prettier 'src/types/*.generated.ts' --write", "generate:reference": "typedoc", "copybara:dry-run": "./copybara.sh dry_run --init-history", "copybara:pr": "./copybara.sh sync_out --init-history" diff --git a/scripts/kitchensink.ts b/scripts/kitchensink.ts deleted file mode 100644 index 663c0d7..0000000 --- a/scripts/kitchensink.ts +++ /dev/null @@ -1,387 +0,0 @@ -import { createReadStream } from "fs"; -import "dotenv/config"; -import { - AssemblyAI, - Transcript, - FinalTranscript, - LemurBaseResponse, - PartialTranscript, - RealtimeTranscript, - CreateRealtimeTranscriberParams, - TranscribeParams, -} from "../src"; - -const client = new AssemblyAI({ - apiKey: process.env.ASSEMBLYAI_API_KEY!, -}); - -(async function transcribeUsingRealtime() { - const useToken = false; - const serviceParams: CreateRealtimeTranscriberParams = { - sampleRate: 16_000, - wordBoost: ["gore", "climate"], - token: useToken - ? await client.realtime.createTemporaryToken({ - expires_in: 480, - }) - : undefined, - encoding: "pcm_s16le", - endUtteranceSilenceThreshold: 500, - }; - const rt = client.realtime.transcriber(serviceParams); - - rt.on("open", ({ sessionId, expiresAt }) => { - console.log("Session ID:", sessionId, "Expires At:", expiresAt); - }); - rt.on("close", (code: number, reason: string) => - console.log("Closed", code, reason), - ); - rt.on("transcript", (transcript: RealtimeTranscript) => - console.log("Transcript:", transcript), - ); - rt.on("transcript.partial", (transcript: PartialTranscript) => - console.log("Transcript:", transcript), - ); - rt.on("transcript.final", (transcript: FinalTranscript) => - console.log("Transcript:", transcript), - ); - rt.on("error", (error: Error) => console.error("Error", error)); - - try { - await rt.connect(); - - const chunkSize = 8 * 1024; - const audio = createReadStream("./tests/static/gore.wav", { - highWaterMark: chunkSize, - }); - for await (const chunk of audio) { - if (chunk.length < chunkSize) continue; - rt.sendAudio(chunk); - await new Promise((resolve) => setTimeout(resolve, 300)); - } - console.log("File end"); - - await rt.close(); - } catch (error) { - console.error(error); - } -})(); - -const audioUrl = "https://storage.googleapis.com/aai-docs-samples/espn.m4a"; -const transcribeParams: TranscribeParams = { - audio: audioUrl, - boost_param: "high", - word_boost: ["Chicago", "draft"], - disfluencies: true, - dual_channel: true, - format_text: false, - language_code: "en", - punctuate: false, - speech_threshold: 0.5, -}; - -(async function uploadFileFromPath() { - const uploadUrl = await client.files.upload("./tests/static/gore.wav"); - console.log("Upload URL:", uploadUrl); -})(); - -(async function transcribeFromPath() { - const transcript = await client.transcripts.transcribe({ - audio: "./tests/static/gore.wav", - }); - console.log(transcript); - return transcript; -})().then((transcript) => deleteTranscript(transcript)); - -(async function transcribeFromStream() { - const transcript = await client.transcripts.transcribe({ - audio: createReadStream("./tests/static/gore.wav"), - }); - console.log(transcript); - return transcript; -})().then((transcript) => deleteTranscript(transcript)); - -(async function createStandardTranscript() { - const transcript = await client.transcripts.transcribe(transcribeParams); - console.log(transcript); - return transcript; -})().then(async (transcript) => { - await exportAsSubtitles(transcript); - await getParagraphs(transcript); - await getSentences(transcript); - await searchTranscript(transcript); - await deleteTranscript(transcript); -}); - -(async function runLemurModels() { - const transcript = await client.transcripts.transcribe(transcribeParams); - await lemurSummary(transcript).then(purgeLemurRequestData); - await lemurQuestionAnswer(transcript).then(purgeLemurRequestData); - await lemurActionPoints(transcript).then(purgeLemurRequestData); - await lemurCustomTask(transcript).then(purgeLemurRequestData); - await deleteTranscript(transcript); -})(); - -(async function createTranscriptWithBadUrl() { - const transcript = await client.transcripts.transcribe({ - audio: "https://storage.googleapis.com/api-docs-samples/oops.m4a", - }); - console.log(transcript); - return transcript; -})().then(async (transcript) => { - try { - await getParagraphs(transcript); - console.error("Error expected but not thrown."); - } catch (error) { - console.log("Error expected:", error); - await deleteTranscript(transcript); - } -}); - -(async function createTranscriptWithNullUrl() { - try { - await client.transcripts.submit({ - audio: null as unknown as string, - }); - console.error("Error expected but not thrown."); - } catch (error) { - console.log("Error expected:", error); - } -})(); - -(async function createTranscriptWithWordBoost() { - const transcript = await client.transcripts.transcribe({ - ...transcribeParams, - boost_param: "high", - word_boost: ["knee", "hip"], - }); - console.log(transcript); - return transcript; -})().then(deleteTranscript); - -(async function createTranscriptWithSummarization() { - const transcript = await client.transcripts.transcribe({ - ...transcribeParams, - summarization: true, - summary_model: "conversational", - summary_type: "bullets_verbose", - punctuate: true, - format_text: true, - }); - console.log(transcript); - return transcript; -})().then(deleteTranscript); - -(async function createTranscriptWithContentSafety() { - const transcript = await client.transcripts.transcribe({ - ...transcribeParams, - content_safety: true, - }); - console.log(transcript); - return transcript; -})().then(deleteTranscript); - -(async function createTranscriptWithCustomSpelling() { - const transcript = await client.transcripts.transcribe({ - ...transcribeParams, - custom_spelling: [ - { from: ["quarterback", "QB"], to: "nickelback" }, - { from: ["bear"], to: "cub" }, - ], - }); - console.log(transcript); - return transcript; -})().then(deleteTranscript); - -(async function createTranscriptWithEntityDetection() { - const transcript = await client.transcripts.transcribe({ - ...transcribeParams, - entity_detection: true, - }); - console.log(transcript); - return transcript; -})().then(deleteTranscript); - -(async function createTranscriptWithFilterProfanity() { - const transcript = await client.transcripts.transcribe({ - ...transcribeParams, - filter_profanity: true, - }); - console.log(transcript); - return transcript; -})().then(deleteTranscript); - -(async function createTranscriptWithTopicDetection() { - const transcript = await client.transcripts.transcribe({ - ...transcribeParams, - iab_categories: true, - }); - console.log(transcript); - return transcript; -})().then(deleteTranscript); - -(async function createTranscriptWithLanguageDetection() { - const transcript = await client.transcripts.transcribe({ - ...transcribeParams, - language_code: undefined, - language_detection: true, - }); - console.log(transcript); - return transcript; -})().then(deleteTranscript); - -(async function createTranscriptWithPiiRedaction() { - const transcript = await client.transcripts.transcribe({ - ...transcribeParams, - format_text: true, - redact_pii: true, - redact_pii_audio: true, - redact_pii_audio_quality: "wav", - redact_pii_policies: ["injury", "medical_condition", "medical_process"], - redact_pii_sub: "hash", - }); - console.log(transcript); - return transcript; -})().then(deleteTranscript); - -(async function createTranscriptWithSentimentAnalysis() { - const transcript = await client.transcripts.transcribe({ - ...transcribeParams, - punctuate: true, - sentiment_analysis: true, - }); - console.log(transcript); - return transcript; -})().then(deleteTranscript); - -(async function createTranscriptWithSpeakerLabels() { - const transcript = await client.transcripts.transcribe({ - ...transcribeParams, - dual_channel: false, - punctuate: true, - speaker_labels: true, - speakers_expected: 2, - }); - console.log(transcript); - return transcript; -})().then(deleteTranscript); - -(async function createTranscriptWithWebhook() { - const transcript = await client.transcripts.transcribe({ - ...transcribeParams, - webhook_auth_header_name: "x-foo", - webhook_auth_header_value: "bar", - webhook_url: "https://www.assemblyai.com/404", - }); - console.log(transcript); - return transcript; -})().then(deleteTranscript); - -// paginate using prev_url because transcripts are returned in descending order of creation -(async function listTranscripts() { - let previousPageUrl: string | undefined | null; - do { - const page = await client.transcripts.list( - previousPageUrl as string | undefined, - ); - console.log(page); - previousPageUrl = page.page_details.prev_url; - } while (previousPageUrl); -})(); - -async function searchTranscript(transcript: Transcript) { - const result = await client.transcripts.wordSearch(transcript.id, [ - "draft", - "football", - ]); - console.log(result); -} - -async function exportAsSubtitles(transcript: Transcript) { - const srt = await client.transcripts.subtitles(transcript.id, "srt"); - const vtt = await client.transcripts.subtitles(transcript.id, "vtt"); - console.log("SRT subtitles", srt); - console.log("VTT subtitles", vtt); -} - -async function getParagraphs(transcript: Transcript) { - const paragraphs = await client.transcripts.paragraphs(transcript.id); - console.dir(paragraphs, { depth: null }); -} - -async function getSentences(transcript: Transcript) { - const sentences = await client.transcripts.sentences(transcript.id); - console.dir(sentences, { depth: null }); -} - -async function deleteTranscript(transcript: Transcript) { - await client.transcripts.delete(transcript.id); -} - -const lemurContext = - "This is a podcast on the ESPN channel talking about NFL draft picks."; - -async function lemurSummary(transcript: Transcript) { - const response = await client.lemur.summary({ - transcript_ids: [transcript.id], - context: lemurContext, - final_model: "basic", - max_output_size: 3000, - answer_format: "bullet points", - }); - console.log(response.response); - return response; -} - -async function lemurQuestionAnswer(transcript: Transcript) { - const response = await client.lemur.questionAnswer({ - transcript_ids: [transcript.id], - questions: [ - { - question: "Which players were mentioned?", - context: lemurContext, - answer_format: " ", - }, - { - question: "Were they excited", - context: lemurContext, - answer_options: ["yes", "no"], - }, - ], - context: lemurContext, - final_model: "basic", - max_output_size: 3000, - }); - console.log(response.response); - return response; -} - -async function lemurActionPoints(transcript: Transcript) { - const response = await client.lemur.actionItems({ - transcript_ids: [transcript.id], - context: lemurContext, - final_model: "basic", - max_output_size: 3000, - }); - console.log(response.response); - return response; -} - -async function lemurCustomTask(transcript: Transcript) { - const response = await client.lemur.task({ - transcript_ids: [transcript.id], - prompt: "List all the teams and their players that are mentioned.", - context: lemurContext, - final_model: "basic", - max_output_size: 3000, - }); - console.log(response.response); - return response; -} - -async function purgeLemurRequestData(lemurResponse: LemurBaseResponse) { - const response = await client.lemur.purgeRequestData( - lemurResponse.request_id, - ); - console.log(response); -} diff --git a/src/services/realtime/service.ts b/src/services/realtime/service.ts index de59b09..79d05e2 100644 --- a/src/services/realtime/service.ts +++ b/src/services/realtime/service.ts @@ -12,6 +12,7 @@ import { SessionBeginsEventData, AudioEncoding, AudioData, + SessionInformation, } from "../.."; import { RealtimeError, @@ -49,6 +50,9 @@ export class RealtimeTranscriber { private apiKey?: string; private token?: string; private endUtteranceSilenceThreshold?: number; + private enableExtraSessionInformation?: boolean; + private disablePartialTranscripts?: boolean; + private socket?: WebSocket; private listeners: RealtimeListeners = {}; private sessionTerminatedResolve?: () => void; @@ -59,6 +63,8 @@ export class RealtimeTranscriber { this.wordBoost = params.wordBoost; this.encoding = params.encoding; this.endUtteranceSilenceThreshold = params.endUtteranceSilenceThreshold; + this.enableExtraSessionInformation = params.enableExtraSessionInformation; + this.disablePartialTranscripts = params.disablePartialTranscripts; if ("token" in params && params.token) this.token = params.token; if ("apiKey" in params && params.apiKey) this.apiKey = params.apiKey; @@ -85,6 +91,18 @@ export class RealtimeTranscriber { if (this.encoding) { searchParams.set("encoding", this.encoding); } + if (this.enableExtraSessionInformation) { + searchParams.set( + "enable_extra_session_information", + this.enableExtraSessionInformation.toString(), + ); + } + if (this.disablePartialTranscripts) { + searchParams.set( + "disable_partial_transcripts", + this.disablePartialTranscripts.toString(), + ); + } url.search = searchParams.toString(); return url; @@ -103,6 +121,10 @@ export class RealtimeTranscriber { event: "transcript.final", listener: (transcript: FinalTranscript) => void, ): void; + on( + event: "session_information", + listener: (info: SessionInformation) => void, + ): void; on(event: "error", listener: (error: Error) => void): void; on(event: "close", listener: (code: number, reason: string) => void): void; // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -183,6 +205,10 @@ export class RealtimeTranscriber { this.listeners["transcript.final"]?.(message); break; } + case "SessionInformation": { + this.listeners.session_information?.(message); + break; + } case "SessionTerminated": { this.sessionTerminatedResolve?.(); break; diff --git a/src/services/transcripts/index.ts b/src/services/transcripts/index.ts index fc4f823..79a8139 100644 --- a/src/services/transcripts/index.ts +++ b/src/services/transcripts/index.ts @@ -15,6 +15,7 @@ import { TranscribeParams, TranscribeOptions, SubmitParams, + SpeechModel, } from "../.."; import { FileService } from "../files"; import { getPath } from "../../utils/path"; @@ -37,6 +38,7 @@ export class TranscriptService extends BaseService { params: TranscribeParams, options?: TranscribeOptions, ): Promise { + deprecateConformer2(params); const transcript = await this.submit(params); return await this.waitUntilReady(transcript.id, options); } @@ -47,6 +49,7 @@ export class TranscriptService extends BaseService { * @returns A promise that resolves to the queued transcript. */ async submit(params: SubmitParams): Promise { + deprecateConformer2(params); let audioUrl; let transcriptParams: TranscriptParams | undefined = undefined; if ("audio" in params) { @@ -87,6 +90,7 @@ export class TranscriptService extends BaseService { params: TranscriptParams, options?: CreateTranscriptOptions, ): Promise { + deprecateConformer2(params); const path = getPath(params.audio_url); if (path !== null) { const uploadUrl = await this.files.upload(path); @@ -246,3 +250,12 @@ export class TranscriptService extends BaseService { ); } } + +function deprecateConformer2(params: { speech_model?: SpeechModel | null }) { + if (!params) return; + if (params.speech_model === "conformer-2") { + console.warn( + "The speech_model conformer-2 option is deprecated and will stop working in the near future. Use best or nano instead.", + ); + } +} diff --git a/src/types/asyncapi.generated.ts b/src/types/asyncapi.generated.ts index 3df1c38..da8197a 100644 --- a/src/types/asyncapi.generated.ts +++ b/src/types/asyncapi.generated.ts @@ -37,6 +37,9 @@ export type ConfigureEndUtteranceSilenceThreshold = { end_utterance_silence_threshold: number; }; +/** + * Transcript text at the end of an utterance with punctuation and casing. + */ export type FinalTranscript = RealtimeBaseTranscript & { /** * Describes the type of message @@ -66,8 +69,12 @@ export type MessageType = | "SessionBegins" | "PartialTranscript" | "FinalTranscript" + | "SessionInformation" | "SessionTerminated"; +/** + * As you send audio data to the API, the API immediately starts responding with Partial Transcript results. + */ export type PartialTranscript = RealtimeBaseTranscript & { /** * Describes the type of message @@ -110,6 +117,9 @@ export type RealtimeBaseTranscript = { words: Word[]; }; +/** + * Error message + */ export type RealtimeError = { error: string; }; @@ -118,6 +128,7 @@ export type RealtimeMessage = | SessionBegins | PartialTranscript | FinalTranscript + | SessionInformation | SessionTerminated | RealtimeError; @@ -125,6 +136,9 @@ export type RealtimeTranscript = PartialTranscript | FinalTranscript; export type RealtimeTranscriptType = "PartialTranscript" | "FinalTranscript"; +/** + * Session start + */ export type SessionBegins = RealtimeBaseMessage & { /** * Timestamp when this session will expire @@ -140,6 +154,25 @@ export type SessionBegins = RealtimeBaseMessage & { session_id: string; }; +/** + * Information about the session + * Information about the session that is concluding. + * This message is sent at the end of the session, before the SessionTerminated message. + */ +export type SessionInformation = RealtimeBaseMessage & { + /** + * The total duration of the audio in seconds + */ + audio_duration_seconds: number; + /** + * Describes the type of the message + */ + message_type: "SessionInformation"; +}; + +/** + * Session terminated + */ export type SessionTerminated = RealtimeBaseMessage & { /** * Describes the type of the message @@ -147,6 +180,9 @@ export type SessionTerminated = RealtimeBaseMessage & { message_type: "SessionTerminated"; }; +/** + * Terminate session + */ export type TerminateSession = { /** * Set to true to end your streaming session forever diff --git a/src/types/openapi.generated.ts b/src/types/openapi.generated.ts index 27073de..150d15e 100644 --- a/src/types/openapi.generated.ts +++ b/src/types/openapi.generated.ts @@ -271,6 +271,10 @@ export type AutoHighlightsResult = { * A temporally-sequential array of Key Phrases */ results: AutoHighlightResult[]; + /** + * The status of the Key Phrases model. Either success, or unavailable in the rare case that the model failed. + */ + status: AudioIntelligenceModelStatus; }; /** diff --git a/src/types/realtime/index.ts b/src/types/realtime/index.ts index b5a8c79..5999aee 100644 --- a/src/types/realtime/index.ts +++ b/src/types/realtime/index.ts @@ -4,6 +4,7 @@ import { PartialTranscript, RealtimeTranscript, RealtimeTranscriptType, + SessionInformation, } from "../asyncapi.generated"; type CreateRealtimeTranscriberParams = { @@ -27,6 +28,18 @@ type CreateRealtimeTranscriberParams = { * The duration of the end utterance silence threshold in milliseconds */ endUtteranceSilenceThreshold?: number; + /** + * Disable partial transcripts. + * Set to `true` to not receive partial transcripts. Defaults to `false`. + * @defaultValue false + */ + disablePartialTranscripts?: boolean; + /** + * Enable extra session information. + * Set to `true` to receive the `session_information` message before the session ends. Defaults to `false`. + * @defaultValue false + */ + enableExtraSessionInformation?: boolean; } & ( | { /** @@ -69,6 +82,18 @@ type RealtimeTranscriberParams = { * The duration of the end utterance silence threshold in milliseconds */ endUtteranceSilenceThreshold?: number; + /** + * Disable partial transcripts. + * Set to `true` to not receive partial transcripts. Defaults to `false`. + * @defaultValue false + */ + disablePartialTranscripts?: boolean; + /** + * Enable extra session information. + * Set to `true` to receive the `session_information` message before the session ends. Defaults to `false`. + * @defaultValue false + */ + enableExtraSessionInformation?: boolean; } & ( | { /** @@ -96,6 +121,7 @@ type RealtimeEvents = | "transcript" | "transcript.partial" | "transcript.final" + | "session_information" | "error"; type SessionBeginsEventData = { @@ -109,6 +135,7 @@ type RealtimeListeners = { transcript?: (transcript: RealtimeTranscript) => void; "transcript.partial"?: (transcript: PartialTranscript) => void; "transcript.final"?: (transcript: FinalTranscript) => void; + session_information?: (info: SessionInformation) => void; error?: (error: Error) => void; }; diff --git a/tests/integration/file.test.ts b/tests/integration/file.test.ts new file mode 100644 index 0000000..3f0471c --- /dev/null +++ b/tests/integration/file.test.ts @@ -0,0 +1,30 @@ +import { createReadStream } from "fs"; +import { readFile } from "fs/promises"; +import path from "path"; +import "dotenv/config"; +import { AssemblyAI } from "../../src"; + +const testDir = process.env["TESTDATA_DIR"] ?? "."; + +const client = new AssemblyAI({ + apiKey: process.env.ASSEMBLYAI_API_KEY!, +}); + +describe("files", () => { + it("should upload a file from path", async () => { + const uploadUrl = await client.files.upload(path.join(testDir, "gore.wav")); + expect(uploadUrl).toBeTruthy(); + }); + + it("should upload a file from stream", async () => { + const stream = createReadStream(path.join(testDir, "gore.wav")); + const uploadUrl = await client.files.upload(stream); + expect(uploadUrl).toBeTruthy(); + }); + + it("should upload a file from buffer", async () => { + const data = await readFile(path.join(testDir, "gore.wav")); + const uploadUrl = await client.files.upload(data); + expect(uploadUrl).toBeTruthy(); + }); +}); diff --git a/tests/integration/lemur.test.ts b/tests/integration/lemur.test.ts new file mode 100644 index 0000000..b3d12f9 --- /dev/null +++ b/tests/integration/lemur.test.ts @@ -0,0 +1,78 @@ +import "dotenv/config"; +import { AssemblyAI } from "../../src"; + +const client = new AssemblyAI({ + apiKey: process.env.ASSEMBLYAI_API_KEY!, +}); +const knownTranscriptIds = process.env.TEST_TRANSCRIPT_IDS?.split(","); + +describe("lemur", () => { + it("should generate a summary", async () => { + const { response } = await client.lemur.summary({ + final_model: "basic", + transcript_ids: knownTranscriptIds, + answer_format: "one sentence", + }); + + expect(response).toBeTruthy(); + }); + + it("should generate an answer", async () => { + const { response } = await client.lemur.questionAnswer({ + final_model: "basic", + transcript_ids: knownTranscriptIds, + questions: [ + { + question: "What are they discussing?", + answer_format: "text", + }, + ], + }); + + expect(response).toBeTruthy(); + expect(response).toHaveLength(1); + }); + + it("should generate action items", async () => { + const { response } = await client.lemur.actionItems({ + final_model: "basic", + transcript_ids: knownTranscriptIds, + }); + + expect(response).toBeTruthy(); + }); + + it("should generate a task", async () => { + const { response } = await client.lemur.task({ + final_model: "basic", + transcript_ids: knownTranscriptIds, + prompt: "Write a haiku about this conversation.", + }); + + expect(response).toBeTruthy(); + }); + + it("should fail to generate a summary", async () => { + const promise = client.lemur.summary({ + final_model: "basic", + transcript_ids: ["bad-id"], + answer_format: "one sentence", + }); + + await expect(promise).rejects.toThrowError( + "each transcript source id must be valid", + ); + }); + + it("should purge request data", async () => { + const { request_id } = await client.lemur.summary({ + final_model: "basic", + transcript_ids: knownTranscriptIds, + answer_format: "one sentence", + }); + + const deletionRequest = await client.lemur.purgeRequestData(request_id); + expect(deletionRequest.deleted).toBeTruthy(); + expect(deletionRequest.request_id_to_purge).toBe(request_id); + }); +}); diff --git a/tests/integration/realtime.test.ts b/tests/integration/realtime.test.ts new file mode 100644 index 0000000..efb36d3 --- /dev/null +++ b/tests/integration/realtime.test.ts @@ -0,0 +1,100 @@ +import { createReadStream } from "fs"; +import path from "path"; +import "dotenv/config"; +import { + AssemblyAI, + CreateRealtimeTranscriberParams, + FinalTranscript, + PartialTranscript, +} from "../../src"; + +const testDir = process.env["TESTDATA_DIR"] ?? "."; +const client = new AssemblyAI({ + apiKey: process.env.ASSEMBLYAI_API_KEY!, +}); + +describe("realtime", () => { + it("creates service with API key", (done) => transcribe(false, done), 20_000); + + it("creates service with token", (done) => transcribe(true, done), 20_000); + + it("can create a token", async () => { + const token = await client.realtime.createTemporaryToken({ + expires_in: 480, + }); + expect(token).toBeTruthy(); + }); +}); + +function transcribe(useToken: boolean, done: jest.DoneCallback) { + const partialTranscripts: PartialTranscript[] = []; + const finalTranscripts: FinalTranscript[] = []; + + createRealtimeTranscriber(useToken) + .then(async (realtimeTranscriber) => { + realtimeTranscriber.on("open", ({ sessionId, expiresAt }) => { + console.log("Session ID:", sessionId, "Expires At:", expiresAt); + }); + realtimeTranscriber.on("close", (code: number, reason: string) => { + console.log("Closed", code, reason); + }); + realtimeTranscriber.on( + "transcript.partial", + (transcript: PartialTranscript) => { + console.log("Transcript:", transcript); + partialTranscripts.push(transcript); + }, + ); + realtimeTranscriber.on( + "transcript.final", + (transcript: FinalTranscript) => { + console.log("Transcript:", transcript); + finalTranscripts.push(transcript); + }, + ); + realtimeTranscriber.on("error", (error: Error) => { + console.error(error); + done(new Error(error.toString())); + }); + + await realtimeTranscriber.connect(); + + const chunkSize = 16 * 1024; + const audio = createReadStream(path.join(testDir, "gore-short.wav"), { + highWaterMark: chunkSize, + }); + let stop = false; + setTimeout(() => realtimeTranscriber.forceEndUtterance(), 5_000); + setTimeout(() => (stop = true), 10_000); + for await (const chunk of audio) { + if (stop) break; + if (chunk.length < chunkSize) continue; + realtimeTranscriber.sendAudio(chunk); + await new Promise((resolve) => setTimeout(resolve, 300)); + } + console.log("File end"); + + await realtimeTranscriber.close(); + + expect(partialTranscripts.length).toBeGreaterThan(0); + expect(finalTranscripts.length).toBeGreaterThan(0); + + done(); + }) + .catch(done); +} + +async function createRealtimeTranscriber(useToken: boolean) { + const serviceParams: CreateRealtimeTranscriberParams = { + sampleRate: 16_000, + wordBoost: ["gore", "climate"], + token: useToken + ? await client.realtime.createTemporaryToken({ + expires_in: 480, + }) + : undefined, + encoding: "pcm_s16le", + endUtteranceSilenceThreshold: 500, + }; + return client.realtime.transcriber(serviceParams); +} diff --git a/tests/integration/transcript.test.ts b/tests/integration/transcript.test.ts new file mode 100644 index 0000000..065c0ac --- /dev/null +++ b/tests/integration/transcript.test.ts @@ -0,0 +1,141 @@ +import path from "path"; +import "dotenv/config"; +import { AssemblyAI } from "../../src"; + +const testDir = process.env["TESTDATA_DIR"] ?? "."; +const remoteAudioUrl = + "https://storage.googleapis.com/aai-web-samples/espn-bears.m4a"; +const badRemoteAudioURL = + "https://storage.googleapis.com/aai-web-samples/does-not-exist.m4a"; +const knownTranscriptId = process.env.TEST_TRANSCRIPT_ID!; + +const client = new AssemblyAI({ + apiKey: process.env.ASSEMBLYAI_API_KEY!, +}); + +describe("transcript", () => { + it("submit create the transcript object with a remote url", async () => { + const transcript = await client.transcripts.submit({ + audio: remoteAudioUrl, + }); + + console.log(transcript); + expect(transcript.status).toBeTruthy(); + expect(transcript.status).not.toBe("error"); + expect(transcript.status).not.toBe("complete"); + }); + + it("submit should create the transcript object with a local file", async () => { + const transcript = await client.transcripts.submit({ + audio: path.join(testDir, "gore.wav"), + }); + + expect(["processing", "queued"]).toContain(transcript.status); + }); + + it("should get the transcript object", async () => { + const transcript = await client.transcripts.get(knownTranscriptId); + + expect(transcript.id).toBeTruthy(); + expect(transcript.text).toBeTruthy(); + }); + + it("transcribe should poll the transcript object", async () => { + const transcript = await client.transcripts.transcribe({ + audio: remoteAudioUrl, + }); + + expect(transcript.status).toBe("completed"); + }); + + it("should wait on the transcript until ready", async () => { + let transcript = await client.transcripts.submit({ + audio: remoteAudioUrl, + }); + transcript = await client.transcripts.waitUntilReady(transcript.id); + + expect(transcript.status).toBe("completed"); + }); + + it("should retrieve a page of transcripts", async () => { + const page = await client.transcripts.list(); + expect(Array.isArray(page.transcripts)).toBeTruthy(); + expect(page.page_details).not.toBeNull(); + }); + + it("should delete the transcript object", async () => { + let transcript = await client.transcripts.transcribe({ + audio: remoteAudioUrl, + }); + transcript = await client.transcripts.delete(transcript.id); + expect(transcript.audio_url).toBe("http://deleted_by_user"); + }); + + it("transcribe should fail to create the transcript object", async () => { + const transcript = await client.transcripts.transcribe({ + audio: badRemoteAudioURL, + }); + expect(transcript.status).toBe("error"); + expect(transcript.error).toBeTruthy(); + }); + + it("should get paragraphs", async () => { + const paragraphsResponse = + await client.transcripts.paragraphs(knownTranscriptId); + expect(Array.isArray(paragraphsResponse.paragraphs)).toBeTruthy(); + expect(paragraphsResponse.paragraphs.length).toBeGreaterThan(0); + }); + + it("should get sentences", async () => { + const sentencesResponse = + await client.transcripts.sentences(knownTranscriptId); + expect(Array.isArray(sentencesResponse.sentences)).toBeTruthy(); + expect(sentencesResponse.sentences.length).toBeGreaterThan(0); + }); + + it("should get srt subtitles", async () => { + const subtitle = await client.transcripts.subtitles( + knownTranscriptId, + "srt", + 32, + ); + expect(subtitle).toBeTruthy(); + }); + + it("should get vtt subtitles", async () => { + const subtitle = await client.transcripts.subtitles( + knownTranscriptId, + "vtt", + 32, + ); + expect(subtitle).toBeTruthy(); + }); + + it("should get redactions", async () => { + const transcript = await client.transcripts.transcribe({ + audio: remoteAudioUrl, + redact_pii: true, + redact_pii_audio: true, + redact_pii_audio_quality: "wav", + redact_pii_policies: ["medical_condition"], + redact_pii_sub: "hash", + }); + const redactedAudioResponse = await client.transcripts.redactions( + transcript.id, + ); + expect(redactedAudioResponse.status).toBe("redacted_audio_ready"); + expect(redactedAudioResponse.redacted_audio_url).toBeTruthy(); + }); + + it("should word search", async () => { + const searchResponse = await client.transcripts.wordSearch( + knownTranscriptId, + ["Giants"], + ); + console.log(searchResponse); + expect(searchResponse.id).toBe(knownTranscriptId); + expect(searchResponse.total_count).toBeGreaterThan(0); + expect(Array.isArray(searchResponse.matches)).toBeTruthy(); + expect(searchResponse.matches.length).toBeGreaterThan(0); + }); +}); diff --git a/tests/file.test.ts b/tests/unit/file.test.ts similarity index 100% rename from tests/file.test.ts rename to tests/unit/file.test.ts diff --git a/tests/lemur.test.ts b/tests/unit/lemur.test.ts similarity index 100% rename from tests/lemur.test.ts rename to tests/unit/lemur.test.ts diff --git a/tests/__mocks__/ws.ts b/tests/unit/mocks/ws.ts similarity index 100% rename from tests/__mocks__/ws.ts rename to tests/unit/mocks/ws.ts diff --git a/tests/realtime.test.ts b/tests/unit/realtime.test.ts similarity index 92% rename from tests/realtime.test.ts rename to tests/unit/realtime.test.ts index 6bdd270..9bb28a0 100644 --- a/tests/realtime.test.ts +++ b/tests/unit/realtime.test.ts @@ -1,12 +1,13 @@ import { TransformStream } from "stream/web"; +jest.mock("ws", () => require("./mocks/ws")); import WS from "jest-websocket-mock"; import fetchMock from "jest-fetch-mock"; -import { AssemblyAI, RealtimeTranscriber } from "../src"; +import { AssemblyAI, RealtimeTranscriber } from "../../src"; import { RealtimeError, RealtimeErrorType, RealtimeErrorMessages, -} from "../src/utils/errors/realtime"; +} from "../../src/utils/errors/realtime"; import { createClient, defaultApiKey, requestMatches } from "./utils"; @@ -20,6 +21,10 @@ const sessionBeginsMessage = { session_id: "123", expires_at: "2023-09-14T03:37:11.516967", }; +const sessionInformationMessage = { + message_type: "SessionInformation", + audio_duration_seconds: 232.192, +}; const sessionTerminatedMessage = { message_type: "SessionTerminated", }; @@ -277,6 +282,16 @@ describe("realtime", () => { ); }); + it("can receive session information", async () => { + const onSessionInformation = jest.fn(); + rt.on("session_information", onSessionInformation); + server.send(JSON.stringify(sessionInformationMessage)); + expect(onSessionInformation).toHaveBeenCalledWith({ + message_type: sessionInformationMessage.message_type, + audio_duration_seconds: sessionInformationMessage.audio_duration_seconds, + }); + }); + it("can create a token", async () => { fetchMock.doMock(); fetchMock.doMockOnceIf( diff --git a/tests/transcript.test.ts b/tests/unit/transcript.test.ts similarity index 89% rename from tests/transcript.test.ts rename to tests/unit/transcript.test.ts index f48a17a..9a5b105 100644 --- a/tests/transcript.test.ts +++ b/tests/unit/transcript.test.ts @@ -116,9 +116,9 @@ describe("transcript", () => { requestMatches({ url: `/v2/transcript/${transcriptId}`, method: "GET" }), JSON.stringify({ id: transcriptId }), ); - const fetched = await assembly.transcripts.get(transcriptId); + const transcript = await assembly.transcripts.get(transcriptId); - expect(fetched.id).toBeTruthy(); + expect(transcript.id).toBeTruthy(); }); it("transcribe should poll the transcript object", async () => { @@ -240,7 +240,7 @@ describe("transcript", () => { }), JSON.stringify({ id: transcriptId }), ); - const deleted = await assembly.transcripts.delete(transcriptId); + const transcript = await assembly.transcripts.delete(transcriptId); expect(fetch).toHaveBeenLastCalledWith( `${defaultBaseUrl}/v2/transcript/${transcriptId}`, @@ -252,17 +252,17 @@ describe("transcript", () => { method: "DELETE", }, ); - expect(deleted.id).toBe(transcriptId); + expect(transcript.id).toBe(transcriptId); }); it("submit should fail to create the transcript object", async () => { const errorResponse = { status: "error" }; fetchMock.mockResponseOnce(JSON.stringify(errorResponse)); - const created = await assembly.transcripts.submit({ + const transcript = await assembly.transcripts.submit({ audio: badRemoteAudioURL, }); - expect(created).toStrictEqual(errorResponse); + expect(transcript).toStrictEqual(errorResponse); expect(fetch).toHaveBeenLastCalledWith(`${defaultBaseUrl}/v2/transcript`, { body: JSON.stringify({ audio_url: badRemoteAudioURL }), headers: { @@ -276,7 +276,7 @@ describe("transcript", () => { it("create should fail to create the transcript object", async () => { const errorResponse = { status: "error" }; fetchMock.mockResponseOnce(JSON.stringify(errorResponse)); - const created = await assembly.transcripts.create( + const transcript = await assembly.transcripts.create( { audio_url: badRemoteAudioURL, }, @@ -285,7 +285,7 @@ describe("transcript", () => { }, ); - expect(created).toStrictEqual(errorResponse); + expect(transcript).toStrictEqual(errorResponse); expect(fetch).toHaveBeenLastCalledWith(`${defaultBaseUrl}/v2/transcript`, { body: JSON.stringify({ audio_url: badRemoteAudioURL }), headers: { @@ -336,10 +336,11 @@ describe("transcript", () => { }), JSON.stringify({ transcriptId, paragraphs: ["paragraph 1"] }), ); - const segment = await assembly.transcripts.paragraphs(transcriptId); + const paragraphsResponse = + await assembly.transcripts.paragraphs(transcriptId); - expect(segment.paragraphs).toBeInstanceOf(Array); - expect(segment.paragraphs.length).toBeGreaterThan(0); + expect(paragraphsResponse.paragraphs).toBeInstanceOf(Array); + expect(paragraphsResponse.paragraphs.length).toBeGreaterThan(0); }); it("should get sentences", async () => { @@ -350,10 +351,11 @@ describe("transcript", () => { }), JSON.stringify({ transcriptId, sentences: ["sentence 1"] }), ); - const segment = await assembly.transcripts.sentences(transcriptId); + const sentencesResponse = + await assembly.transcripts.sentences(transcriptId); - expect(segment.sentences).toBeInstanceOf(Array); - expect(segment.sentences.length).toBeGreaterThan(0); + expect(sentencesResponse.sentences).toBeInstanceOf(Array); + expect(sentencesResponse.sentences.length).toBeGreaterThan(0); }); it("should get srt subtitles", async () => { @@ -401,9 +403,10 @@ describe("transcript", () => { redacted_audio_url: "https://some-url.com", }), ); - const res = await assembly.transcripts.redactions(transcriptId); - expect(res.status).toBe("redacted_audio_ready"); - expect(res.redacted_audio_url).toBeTruthy(); + const redactedAudioResponse = + await assembly.transcripts.redactions(transcriptId); + expect(redactedAudioResponse.status).toBe("redacted_audio_ready"); + expect(redactedAudioResponse.redacted_audio_url).toBeTruthy(); }); it("should word search", async () => { @@ -414,10 +417,12 @@ describe("transcript", () => { matches: [{}], }), ); - const res = await assembly.transcripts.wordSearch(transcriptId, ["bears"]); + const searchResponse = await assembly.transcripts.wordSearch(transcriptId, [ + "bears", + ]); - expect(res.id).toBe(transcriptId); - expect(res.total_count).toBe(1); - expect(res.matches).toBeInstanceOf(Array); + expect(searchResponse.id).toBe(transcriptId); + expect(searchResponse.total_count).toBe(1); + expect(searchResponse.matches).toBeInstanceOf(Array); }); }); diff --git a/tests/utils.test.ts b/tests/unit/utils.test.ts similarity index 95% rename from tests/utils.test.ts rename to tests/unit/utils.test.ts index 1beab5f..c51df3b 100644 --- a/tests/utils.test.ts +++ b/tests/unit/utils.test.ts @@ -1,6 +1,6 @@ import fetchMock from "jest-fetch-mock"; -import { AssemblyAI } from "../src"; -import { getPath } from "../src/utils/path"; +import { AssemblyAI } from "../../src"; +import { getPath } from "../../src/utils/path"; fetchMock.enableMocks(); diff --git a/tests/utils.ts b/tests/unit/utils.ts similarity index 91% rename from tests/utils.ts rename to tests/unit/utils.ts index bf8c9a6..7847dff 100644 --- a/tests/utils.ts +++ b/tests/unit/utils.ts @@ -1,4 +1,4 @@ -import { AssemblyAI } from "../src"; +import { AssemblyAI } from "../../src"; export const defaultBaseUrl = "http://localhost:1234"; export const defaultApiKey = "apikey_123"; From 60ee84ab805aa1e34ac382a2a73e20fcd12ef52c Mon Sep 17 00:00:00 2001 From: Niels Swimberghe <3382717+Swimburger@users.noreply.github.com> Date: Mon, 15 Apr 2024 12:10:53 -0400 Subject: [PATCH 2/2] Add missing jest files --- jest.integration.config.js | 14 ++++++++++++++ jest.unit.config.js | 12 ++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 jest.integration.config.js create mode 100644 jest.unit.config.js diff --git a/jest.integration.config.js b/jest.integration.config.js new file mode 100644 index 0000000..1dfadaf --- /dev/null +++ b/jest.integration.config.js @@ -0,0 +1,14 @@ +/** @type {import('jest').Config} */ +const config = { + preset: "ts-jest", + testEnvironment: "node", + collectCoverage: true, + modulePathIgnorePatterns: ["/dist"], + testMatch: ["**/tests/integration/**/*.test.ts"], + clearMocks: true, + maxConcurrency: 1, +}; + +process.env.TESTDATA_DIR = "tests/static"; + +module.exports = config; diff --git a/jest.unit.config.js b/jest.unit.config.js new file mode 100644 index 0000000..253c02c --- /dev/null +++ b/jest.unit.config.js @@ -0,0 +1,12 @@ +/** @type {import('jest').Config} */ +const config = { + preset: "ts-jest", + testEnvironment: "node", + collectCoverage: true, + modulePathIgnorePatterns: ["/dist"], + testMatch: ["**/tests/unit/**/*.test.ts"], +}; + +process.env.TESTDATA_DIR = "tests/static"; + +module.exports = config;