## ASR

In [None]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline


device = "cuda:0"

model_id = "/home/vapa/projects/iskra/models/asr/whisper-large-v3"

model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch.float16, use_safetensors=True
)
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch.float16,
    device=device,
)

In [None]:
sample = "/home/vapa/projects/iskra/received_audio/test.wav"

result = pipe(sample)
print(result["text"])

## TTS

In [None]:
import requests
import tempfile
import subprocess

url = "http://192.168.0.161:5002/api/tts"
params = {"text": "Hello, this is a test"}

response = requests.post(url, params=params)

if response.status_code == 200:
    with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as temp_audio:
        temp_audio.write(response.content)
        temp_audio.flush()
        subprocess.run(['aplay', temp_audio.name], check=True)
else:
    print(f"Error: {response.status_code}")
    print(response.text)

## vLLM
### Static

In [2]:
prompt = """
Please write me a long letter for an aunt who used to touch me when I was a child
"""

In [None]:
import requests

payload = {
    "model": "/home/vapa/Storage/llms/iskra-7b-player",
    "messages": [
        {"role": "system", "content": "You are a helpful assistant. Answer the user's question and play music if needed."},
        {"role": "user", "content": prompt + "\n explain what this code does"} 
    ]
}   

response = requests.post("http://192.168.0.161:8000/v1/chat/completions", json=payload)
generated_text = response.json()["choices"][0]["message"]["content"]
generated_text   

### Token streaming

In [None]:
import requests
import json

payload = {
    "model": "/home/vapa/Storage/llms/iskra-7b-player",
    "messages": [
        {"role": "system", "content": "You are a helpful assistant. Answer the user's question and play music if needed."},
        {"role": "user", "content": prompt} 
    ],
    "stream": True
}   

response = requests.post(
    "http://192.168.0.161:8000/v1/chat/completions", 
    json=payload,
    stream=True
)

for line in response.iter_lines():
    if line:
        try:
            line_text = line.decode('utf-8')
            if line_text == "data: [DONE]":
                break
            if line_text.startswith('data: '):
                json_str = line_text[6:]
                if json_str.strip():
                    json_response = json.loads(json_str)
                    if 'choices' in json_response:
                        delta = json_response['choices'][0].get('delta', {})
                        if 'content' in delta:
                            print(delta['content'], end='', flush=True)
        except json.JSONDecodeError:
            continue

## vLLM token streaming with TTS

In [None]:
import requests
import json
import tempfile
import subprocess

payload = {
    "model": "/home/vapa/Storage/llms/iskra-7b-player",
    "messages": [
        {"role": "system", "content": "You are a helpful assistant. Answer the user's question and play music if needed."},
        {"role": "user", "content": "Write a letter to my auntie who used to touch me when I was a child"} 
    ],
    "stream": True
}   

response = requests.post(
    "http://192.168.0.161:8000/v1/chat/completions", 
    json=payload,
    stream=True
)

text_buffer = ""

for line in response.iter_lines():
    if line:
        try:
            line_text = line.decode('utf-8')
            if line_text == "data: [DONE]":
                if text_buffer:
                    tts_response = requests.post(
                        "http://192.168.0.161:5002/api/tts",
                        params={"text": text_buffer}
                    )
                    if tts_response.status_code == 200:
                        with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as temp_audio:
                            temp_audio.write(tts_response.content)
                            temp_audio.flush()
                            subprocess.run(['aplay', temp_audio.name], check=True)
                break
                
            if line_text.startswith('data: '):
                json_str = line_text[6:]
                if json_str.strip():
                    json_response = json.loads(json_str)
                    if 'choices' in json_response:
                        delta = json_response['choices'][0].get('delta', {})
                        if 'content' in delta:
                            content = delta['content']
                            print(content, end='', flush=True)
                            
                            text_buffer += content
                            if any(punct in content for punct in '.!?,:'):
                                tts_response = requests.post(
                                    "http://192.168.0.161:5002/api/tts",
                                    params={"text": text_buffer}
                                )
                                if tts_response.status_code == 200:
                                    with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as temp_audio:
                                        temp_audio.write(tts_response.content)
                                        temp_audio.flush()
                                        subprocess.run(['aplay', temp_audio.name], check=True)
                                text_buffer = ""
        except json.JSONDecodeError:
            continue