In [1]:
import torch
import os
import warnings
warnings.filterwarnings("ignore")

In [2]:
torch.cuda.is_available()

False

In [3]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("NexaAIDev/Octopus-v2")
model = AutoModelForCausalLM.from_pretrained("NexaAIDev/Octopus-v2", device_map="auto",  trust_remote_code=True)

`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.
Loading checkpoint shards: 100%|██████████| 2/2 [00:13<00:00,  6.56s/it]


In [4]:
def run_octopus(input_text):
    input_ids = tokenizer(input_text, return_tensors="pt").to(model.device)
    input_length = input_ids["input_ids"].shape[1]
    outputs = model.generate(
        input_ids=input_ids["input_ids"], 
        max_length=1024,
        do_sample=False)
    generated_sequence = outputs[:, input_length:].tolist()
    res = tokenizer.decode(generated_sequence[0])

    print("octopus result", res)

    # Use functional tokens to map to functions
    
    split = res.split("<")
    func_name, argument = "", ""
    predicted_func_name = split[1].split(">")[0]
    if predicted_func_name == "nexa_2":
        func_name = "get_weather_forecast"
        argument = split[1].split("'")[1]
    if predicted_func_name == "nexa_13":
        func_name = "set_volume"
        argument = split[1].split('(')[1].split(',')[0]

    return func_name, argument

In [5]:
query = "How's weather in Mumbai?"
func_name, argument = run_octopus(query)
print(func_name, argument)

octopus result  I'm planning a trip.

Answer: <nexa_2>('Mumbai')<nexa_end>

Function description: 
def get_weather_forecast(location):
    """
    Provides a weather forecast for a specified location over a given number of days. Each day's forecast includes a brief description of the expected weather conditions.

    Parameters:
    - location (str): The location for which the weather forecast is desired. Can be a city name, ZIP code, or other location identifiers.

    Returns:
    - list[str]: A list of strings, each representing the weather forecast for one day. Each string includes the date and a brief description of the weather conditions. Formatted in 'YYYY-MM-DD: Description' format.
    """
<eos>
get_weather_forecast Mumbai


In [6]:
from transformers import AutoProcessor, SeamlessM4TModel
import torchaudio
import time

processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
stot_model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")

In [7]:
def speech_to_text(filepath):
    t1 = time.time()
    audio, orig_freq =  torchaudio.load(filepath, backend = "soundfile")
    audio =  torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16_000)
    audio_inputs = processor(audios=audio, return_tensors="pt")
    output_tokens = stot_model.generate(**audio_inputs, tgt_lang="eng", generate_speech = False)
    generated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens = True)

    print("Time taken for speech to text: ", time.time() - t1)
    print("Speech to text output: ", generated_text_from_audio)

    return generated_text_from_audio

In [8]:
def set_volume(level):
    level = int(level)
    if level == 0:
        command = 'FOR /L %G IN (1,1,15) DO adb shell input keyevent KEYCODE_VOLUME_DOWN'
        os.system(f"start /wait cmd /k {command}")
    else:
        command = f'FOR /L %G IN (1,1,{level}) DO adb shell input keyevent KEYCODE_VOLUME_UP'
        os.system(f"start /wait cmd /k {command}")

In [9]:
def get_weather_forecast(place):
    command = f'adb shell am start -a android.intent.action.VIEW -d https://www.google.com/search?q=Weather+of+{place}'
    os.system(f"start /wait cmd /k {command}")

In [10]:
def run_sdb(function_name, argument):
    
    func = globals().get(function_name)
    func(argument)

In [11]:
import gradio as gr

with gr.Blocks() as demo:

    audio = gr.Audio(source = "microphone", type = "filepath", label = "Record")
    with gr.Row():
        submit = gr.Button("Submit")
        clear = gr.ClearButton([audio])

    def user(filepath):
        user_message = speech_to_text(filepath)
        print(user_message)
        func_name, argument = run_octopus(user_message)
        print(func_name, argument)
        run_sdb(func_name, argument)

    submit.click(user, [audio], [], queue=False)
    
demo.queue()
demo.launch()

ModuleNotFoundError: No module named 'gradio'