# Notebook designed to be run in colab

In [1]:
import os
!git lfs install
!git clone https://huggingface.co/ethan3048/saiCommandProcessor
github_token = "github access token"
repo_url = "https://github.com/EthanEpp/saiCommandExecution.git"
os.environ['GITHUB_TOKEN'] = github_token
!git clone https://$GITHUB_TOKEN@github.com/EthanEpp/saiCommandExecution.git
os.chdir('saiCommandExecution')
print("Current working directory:", os.getcwd())
!pip install -r requirements.txt
import sys
sys.path.append('content/saiCommandExecution')
!pip install sacremoses
!pip install sounddevice scipy openai-whisper

import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from torch.autograd import Variable
import pickle
# Ensure Python can find your modules by adding directories to the path
sys.path.append('/content/saiCommandExecution/src')
sys.path.append('/content/saiCommandExecution/src/models')
sys.path.append('/content/saiCommandExecution/src/services')
sys.path.append('/content/saiCommandExecution/src/utils')
%ls




Git LFS initialized.
Cloning into 'saiCommandProcessor'...
remote: Enumerating objects: 36, done.[K
remote: Counting objects: 100% (33/33), done.[K
remote: Compressing objects: 100% (32/32), done.[K
remote: Total 36 (delta 2), reused 0 (delta 0), pack-reused 3 (from 1)[K
Unpacking objects: 100% (36/36), 307.27 KiB | 688.00 KiB/s, done.
Filtering content: 100% (16/16), 8.98 GiB | 51.68 MiB/s, done.
Cloning into 'saiCommandExecution'...
remote: Enumerating objects: 520, done.[K
remote: Counting objects: 100% (269/269), done.[K
remote: Compressing objects: 100% (175/175), done.[K
remote: Total 520 (delta 116), reused 202 (delta 77), pack-reused 251[K
Receiving objects: 100% (520/520), 1.99 MiB | 21.86 MiB/s, done.
Resolving deltas: 100% (237/237), done.
Current working directory: /content/saiCommandExecution
Collecting openai_whisper==20231117 (from -r requirements.txt (line 2))
  Downloading openai-whisper-20231117.tar.gz (798 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [12]:
import importlib
# Import your modules
import models.cnet
import services.cnet_inference
import utils.dataloader

# Reload the modules to ensure the latest changes are picked up
importlib.reload(models.cnet)
importlib.reload(services.cnet_inference)
importlib.reload(utils.dataloader)

# Ensure that the BertLayer class is available
from models.cnet import BertLayer, Encoder, Middle, Decoder, PositionalEncoding
from src.utils.dataloader import tokenize_sample
from src.services.cnet_inference import predict_intent_and_tags, run_inference

# Now use the reloaded modules
from models import cnet
from services import cnet_inference
from utils import dataloader


In [13]:
!apt-get install -y ffmpeg
!pip install pydub scipy openai-whisper

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.


# main test

In [15]:
# import sounddevice as sd
import numpy as np
from scipy.io.wavfile import write
import whisper
import IPython.display as ipd
import torch
from pydub import AudioSegment
from pydub.playback import play
from google.colab import output
import base64
import time
global closest_command
def record_audio(duration=5, sample_rate=16000):
    # Record audio using Google Colab's built-in recorder
    print("Recording audio...")
    record_js = """
    const sleep = time => new Promise(resolve => setTimeout(resolve, time))
    const b2text = blob => new Promise(resolve => {
        const reader = new FileReader()
        reader.onload = () => resolve(reader.result)
        reader.readAsDataURL(blob)
    })

    var record = time => new Promise(resolve => {
        navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
            recorder = new MediaRecorder(stream)
            chunks = []
            recorder.ondataavailable = e => chunks.push(e.data)
            recorder.start()
            sleep(time).then(() => {
                recorder.onstop = async () => {
                    blob = new Blob(chunks)
                    text = await b2text(blob)
                    resolve(text)
                }
                recorder.stop()
            })
        })
    })

    """
    display(ipd.Javascript(record_js))
    audio_data = output.eval_js('record(%d)' % (duration * 1000))
    audio_file_path = "/content/temp_audio.wav"
    with open(audio_file_path, "wb") as f:
        f.write(base64.b64decode(audio_data.split(",")[1]))
    return audio_file_path

def main_text_only():

    try:
        text_input = "start a one hour timer and name it shabadoo"
        closest_command = run_inference(text_input, model)
        print("Interpreted command:", closest_command)
        return closest_command
    except KeyboardInterrupt:
        print("Stopping...")

def main():

    try:
        print("Listening...")
        duration = 5  # Adjust duration as needed

        # Record audio from the microphone
        audio_file_path = record_audio(duration=duration)

        # Play the recorded audio
        display(ipd.Audio(audio_file_path))

        # Process the audio using Whisper
        print("Processing audio with Whisper...")
        start_time = time.time()
        result = whisper_model.transcribe(audio_file_path)
        end_time = time.time()
        transcription_time = end_time - start_time
        user_speech = result["text"]
        print("Heard:", user_speech)
        print(f"Transcription took {transcription_time:.2f} seconds")
        if user_speech:
            closest_command = run_inference(user_speech, model)
            print("Interpreted command:", closest_command)
            return closest_command
    except KeyboardInterrupt:
        print("Stopping...")

# Run the main function
model_path = "/content/saiCommandProcessor/processor_model/ctranfinal_eic_7_31"
bert_addr = '/content/saiCommandProcessor/bert-large-uncased'
model = cnet.CNet(model_path=model_path, bert_addr=bert_addr)
model = model.cuda()

# Load the Whisper model
# whisper_model = whisper.load_model("large")

# Move the Whisper model to CUDA
# if torch.cuda.is_available():
    # whisper_model = whisper_model.cuda()

In [17]:
cls_cmd =main_text_only()


Interpreted command: {'intent': 'set_timer', 'tags': {'B-timer_length': ['one'], 'I-timer_length': ['hour'], 'B-timer_name': ['shabadoo']}, 'timings': {'bert_input_prep': 0.00020885467529296875, 'sequence_prep': 6.723403930664062e-05, 'model_processing': 0.17401504516601562}}


# Inference

In [None]:
# print(run_inference("play the beatles on apple music", '/content/drive/MyDrive/SoftAcuity Models/speechCommands/CTran-main/bert-large-uncased', model))


In [None]:
# print(run_inference("start a one hour timer and name it shabadoo", '/content/drive/MyDrive/SoftAcuity Models/speechCommands/CTran-main/bert-large-uncased', model))
