Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1 +1 @@
@GetStream/android-developers
* @GetStream/android-developers
2 changes: 0 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ on:
- main

pull_request:
branches:
- '**'

workflow_dispatch:

Expand Down
129 changes: 107 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -219,25 +219,56 @@ data class MessageData(
### SpeechToTextButton

`SpeechToTextButton` provides speech-to-text functionality with animated waveform visualization
and automatic permission handling. When not recording, it displays a microphone icon button.
When recording, it transforms into a circular button with animated bars that respond to voice input.
and automatic permission handling. Clicking the button toggles recording on and off. When not recording,
it displays a microphone icon button. When recording, it transforms into a circular button with animated
bars that respond to voice input levels.

**Basic Usage:**

```kotlin
import io.getstream.chat.android.ai.compose.ui.component.SpeechToTextButton
import io.getstream.chat.android.ai.compose.ui.component.rememberSpeechToTextButtonState

@Composable
fun MyComposer() {
var text by remember { mutableStateOf("") }

SpeechToTextButton(
onTextRecognized = { recognizedText ->
// Called with partial results as user speaks
// At the end this callback is invoked with the full recognized text
val speechState = rememberSpeechToTextButtonState(
onFinalResult = { recognizedText ->
// Called with the final result when recording stops
text = recognizedText
}
)

SpeechToTextButton(
state = speechState
)
}
```

**Usage with Real-time Streaming:**

```kotlin
@Composable
fun MyComposer() {
var text by remember { mutableStateOf("") }

val speechState = rememberSpeechToTextButtonState(
onPartialResult = { partialText ->
// Called with partial results as user speaks (real-time streaming)
// This updates continuously as speech is detected
text = partialText
},
onFinalResult = { finalText ->
// Called with the final result when recording stops
// This is the complete, finalized transcription
text = finalText
}
)

SpeechToTextButton(
state = speechState
)
}
```

Expand All @@ -249,29 +280,39 @@ import io.getstream.chat.android.ai.compose.ui.component.rememberSpeechToTextBut

@Composable
fun MyComposer() {
val speechState = rememberSpeechToTextButtonState()
var text by remember { mutableStateOf("") }

// Remember the text that existed before starting speech recognition
val textBeforeSpeech = remember { mutableStateOf("") }
var textBeforeSpeech by remember { mutableStateOf("") }

val speechState = rememberSpeechToTextButtonState(
onPartialResult = { partialText ->
// Update with partial results in real-time
text = if (textBeforeSpeech.isBlank()) {
partialText
} else {
"$textBeforeSpeech $partialText"
}
},
onFinalResult = { finalText ->
// Append final recognized text after the text which is already in the composer
text = if (textBeforeSpeech.isBlank()) {
finalText
} else {
"$textBeforeSpeech $finalText"
}
}
)

// Capture text when recording starts
LaunchedEffect(speechState.isRecording()) {
if (speechState.isRecording()) {
textBeforeSpeech.value = text
textBeforeSpeech = text
}
}

SpeechToTextButton(
state = speechState,
onTextRecognized = { recognizedText ->
// Append recognized text after the text which is already in the composer
text = if (textBeforeSpeech.value.isBlank()) {
recognizedText
} else {
"${textBeforeSpeech.value} $recognizedText"
}
}
state = speechState
)

// Check if currently recording
Expand All @@ -281,17 +322,61 @@ fun MyComposer() {
}
```

**Customization:**

```kotlin
val speechState = rememberSpeechToTextButtonState(
onPartialResult = { partialText ->
// Handle partial results (optional)
},
onFinalResult = { finalText ->
// Handle final result
}
)

SpeechToTextButton(
state = speechState,
idleContent = { onClick ->
// Custom content when not recording
IconButton(onClick = onClick) {
Icon(Icons.Default.Mic, "Voice input")
}
},
recordingContent = { onClick, rmsdB ->
// Custom content when recording
// rmsdB is the current audio level (0-10) for visualization
IconButton(onClick = onClick) {
// Your custom recording visualization
}
}
)
```

**Features:**
- Click to toggle recording on/off
- Automatic audio permission requests (RECORD_AUDIO)
- Animated waveform visualization during recording
- Real-time streaming of recognized text (partial results)
- Animated waveform visualization during recording that responds to audio levels
- Real-time streaming of recognized text (partial results as speech is detected)
- Final result callback when recording stops
- Automatic UI transformation between idle and recording states
- State tracking for recording status

**Parameters:**
- `modifier`: Modifier to be applied to the root container
- `state`: Optional state holder for tracking recording status (defaults to remembered state)
- `onTextRecognized`: Callback invoked with each partial result as speech is detected
- `state`: Optional state holder for tracking recording status and receiving recognized text.
Defaults to a remembered state with an empty callback. To receive text, create a state using
`rememberSpeechToTextButtonState` with your callbacks and pass it here.
- `idleContent`: The composable content to display when not recording. Receives an onClick callback
that toggles recording. Defaults to a microphone icon button.
- `recordingContent`: The composable content to display when recording. Receives an onClick callback
that stops recording and the current audio level (rmsdB) for visualization. Defaults to animated bars.

**rememberSpeechToTextButtonState Parameters:**
- `onPartialResult`: Optional callback invoked when text chunks are recognized during recording.
Called with each partial result as speech is detected, enabling real-time text streaming.
Use this to update your UI in real-time as the user speaks.
- `onFinalResult`: Required callback invoked when the final result is available after recording stops.
Called with the complete, finalized transcription. This is the definitive result of the speech recognition.

**SpeechToTextButtonState API:**

Expand Down
2 changes: 1 addition & 1 deletion config/detekt/detekt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ naming:
forbiddenName: []
FunctionMaxLength:
active: true
maximumFunctionNameLength: 30
maximumFunctionNameLength: 35
excludes: ['**/test/**', '**/androidTest/**', '**/commonTest/**', '**/jvmTest/**', '**/androidUnitTest/**', '**/androidInstrumentedTest/**', '**/jsTest/**', '**/iosTest/**']
FunctionMinLength:
active: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,12 @@ class App : Application() {
.build()

val user = User(
id = "andrerego",
name = "André Rêgo",
image = "https://ca.slack-edge.com/T02RM6X6B-U083JCB6ZEY-2da235988b74-512",
id = "stream-user",
name = "Stream User",
)

// https://getstream.io/chat/docs/php/token_generator/
val token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoiYW5kcmVyZWdvIn0.DfzvkOT8-cnpTFzD5E3XL5P3nI8GJFo5Suxf23kvHuo"
val token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoic3RyZWFtLXVzZXIifQ.ZG2h53Sne0kyCq5iI40ExcS0MCqDa9q-Dbc-iJ2niYU"
chatClient.connectUser(user, token)
.enqueue { result ->
if (result.isFailure) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@

package io.getstream.chat.android.ai.compose.ui.component

import android.content.Context
import android.content.Intent
import android.content.res.Configuration
import android.net.Uri
import android.provider.Settings
import androidx.activity.result.PickVisualMediaRequest
import androidx.activity.result.contract.ActivityResultContracts
import androidx.compose.animation.AnimatedContent
Expand All @@ -46,18 +49,23 @@ import androidx.compose.material3.LocalTextStyle
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.OutlinedIconButton
import androidx.compose.material3.OutlinedTextFieldDefaults
import androidx.compose.material3.SnackbarHost
import androidx.compose.material3.SnackbarHostState
import androidx.compose.material3.SnackbarResult
import androidx.compose.material3.Surface
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.runtime.LaunchedEffect
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.rememberCoroutineScope
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.graphics.SolidColor
import androidx.compose.ui.graphics.takeOrElse
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.platform.LocalSoftwareKeyboardController
import androidx.compose.ui.text.TextStyle
import androidx.compose.ui.text.input.ImeAction
Expand All @@ -66,6 +74,7 @@ import androidx.compose.ui.unit.dp
import io.getstream.chat.android.ai.compose.ui.component.internal.ChatAiIcons
import io.getstream.chat.android.ai.compose.ui.component.internal.SelectedAttachmentList
import io.getstream.chat.android.ai.compose.ui.component.internal.rememberPhotoPickerLauncher
import kotlinx.coroutines.launch

/**
* Data class representing a message composed by the user.
Expand Down Expand Up @@ -243,15 +252,28 @@ private fun TextField(
}
val mergedTextStyle = textStyle.merge(TextStyle(color = textColor))

val speechToTextState = rememberSpeechToTextButtonState()

// Remember the text that existed before starting speech recognition
val textBeforeSpeech = remember { mutableStateOf("") }
var textBeforeSpeech by remember { mutableStateOf("") }

val onTextRecognized = { recognizedText: String ->
onTextChange(
if (textBeforeSpeech.isBlank()) {
recognizedText
} else {
"$textBeforeSpeech $recognizedText"
},
)
}

val speechToTextState = rememberSpeechToTextButtonState(
onPartialResult = onTextRecognized,
onFinalResult = onTextRecognized,
)

// Update textBeforeSpeech when recording starts/stops
LaunchedEffect(speechToTextState.isRecording()) {
if (speechToTextState.isRecording()) {
textBeforeSpeech.value = text
textBeforeSpeech = text
}
}

Expand All @@ -261,6 +283,14 @@ private fun TextField(
else -> null
}

val coroutineScope = rememberCoroutineScope()
val snackbarHostState = remember { SnackbarHostState() }
val context = LocalContext.current

SnackbarHost(
hostState = snackbarHostState,
)

BasicTextField(
modifier = modifier.defaultMinSize(minHeight = LocalMinimumInteractiveComponentSize.current),
value = text,
Expand Down Expand Up @@ -291,7 +321,7 @@ private fun TextField(
}
}
Row(
verticalAlignment = Alignment.CenterVertically,
verticalAlignment = Alignment.Bottom,
) {
Box(
modifier = Modifier.weight(1f),
Expand All @@ -309,24 +339,26 @@ private fun TextField(
innerTextField = innerTextField,
)
}
FilledIconButton(
onClick = {
},
enabled = !isStreaming,
) {
SpeechToTextButton(
state = speechToTextState,
onTextRecognized = { recognizedText ->
// Partial results already contain full text, so replace (don't accumulate)
onTextChange(
if (textBeforeSpeech.value.isBlank()) {
recognizedText
} else {
"${textBeforeSpeech.value} $recognizedText"
},
)
},
)
AnimatedContent(
targetState = !isStreaming,
) { visible ->
if (visible) {
SpeechToTextButton(
state = speechToTextState,
onPermissionDenied = {
coroutineScope.launch {
val result = snackbarHostState.showSnackbar(
message = "Microphone permission is required to record audio",
actionLabel = "Settings",
withDismissAction = true,
)
if (result == SnackbarResult.ActionPerformed) {
context.openSettings()
}
}
},
)
}
}
AnimatedContent(
targetState = trailingButton,
Expand Down Expand Up @@ -362,6 +394,13 @@ private fun TextField(
)
}

private fun Context.openSettings() {
val intent = Intent(Settings.ACTION_APPLICATION_DETAILS_SETTINGS).apply {
data = Uri.fromParts("package", packageName, null)
}
startActivity(intent)
}

@Composable
private fun TextInputField(
modifier: Modifier,
Expand Down
Loading