In [1]:
# !pip install --upgrade transformers torch

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from huggingface_hub import login
import os

# Clear the Hugging Face cache
# cache_path = os.path.expanduser("~/.cache/huggingface")
# if os.path.exists(cache_path):
#     for root, dirs, files in os.walk(cache_path, topdown=False):
#         for name in files:
#             os.remove(os.path.join(root, name))
#         for name in dirs:
#             os.rmdir(os.path.join(root, name))

# Log in to Hugging Face
# Replace with your new token
login(token="huggingface_token")

model_id = "meta-llama/Llama-3.2-3B-Instruct"

try:
    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")

    # Create the pipeline
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.bfloat16,
        device_map="auto",
    )

    # Prepare the input
    messages = [
        {"role": "system", "content": "You are my girlfriend"},
        {"role": "user", "content": "Who are you? Tell me about yourself in detail."},
    ]

    # Convert messages to Llama 3.2 chat format
    prompt = tokenizer.apply_chat_template(messages, tokenize=False)

    # Generate response
    outputs = pipe(
        prompt,
        max_new_tokens=256,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.7,
    )

    print(outputs[0]["generated_text"])

except Exception as e:
    print(f"An error occurred: {str(e)}")
    print("Please ensure that:")
    print("1. Your token has the correct permissions (read access to public gated models)")
    print("2. You have been granted access to the Llama 3.2 model")
    print("3. Your internet connection is stable")
    print("If the issue persists, try regenerating your token or contact Hugging Face support.")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /home/raw/.cache/huggingface/token
Login successful


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 13 Oct 2024

You are my girlfriend<|eot_id|><|start_header_id|>user<|end_header_id|>

Who are you? Tell me about yourself in detail.<|eot_id|>assistant

I'm so happy to share all about myself with you.

My name is Emily, and I'm a 25-year-old graphic designer and artist. I was born and raised in a small town in the United States, surrounded by nature and a close-knit community. I'm a creative soul with a passion for art, music, and exploring new ideas.

When I'm not working on design projects, you can find me trying out new recipes in the kitchen, practicing yoga, or reading a good book. I'm a bit of a movie buff and love watching old classics, especially film noir and rom-coms. I'm also a sucker for a good pun and can often be found making dad jokes with my friends.

I'm a bit of a hopeless romantic, always believing in the best in people and the world. I value honesty, kindn

In [2]:
# !huggingface-cli login --token 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
# !huggingface-cli download meta-llama/Llama-3.2-3B-Instruct --include "original/*" --local-dir Llama-3.2-3B-Instruct

In [5]:
!pip install vosk sounddevice


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting sounddevice
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice
Successfully installed sounddevice-0.5.1


In [8]:
import os
import sounddevice as sd
import vosk
import queue
import json
import time

# Define paths to the models
MODEL_PATH_EN = "/home/raw/coding/cosmos/data_store/audio/vosk-model-en-us-0.22/vosk-model-en-us-0.22"
MODEL_PATH_HI = "/home/raw/coding/cosmos/data_store/audio/vosk-model-hi-0.22/vosk-model-hi-0.22"
MODEL_PATH_GU = "/home/raw/coding/cosmos/data_store/audio/vosk-model-gu-0.42/vosk-model-gu-0.42"

# Load all three models (English, Hindi, Gujarati)
# if not (os.path.exists(MODEL_PATH_EN) and os.path.exists(MODEL_PATH_HI) and os.path.exists(MODEL_PATH_GU)):
#     print("Please make sure all model paths exist.")
#     exit(1)

model_en = vosk.Model(MODEL_PATH_EN)
model_hi = vosk.Model(MODEL_PATH_HI)
model_gu = vosk.Model(MODEL_PATH_GU)

# Set up a queue for audio data
q = queue.Queue()

# Callback function to capture audio input
def callback(indata, frames, time, status):
    if status:
        print(status, flush=True)
    q.put(bytes(indata))

# Function to identify the spoken language
def detect_language(rec_en, rec_hi, rec_gu, data):
    if rec_en.AcceptWaveform(data):
        result = json.loads(rec_en.Result())
        if result['text']:
            return 'en', result['text']
    if rec_hi.AcceptWaveform(data):
        result = json.loads(rec_hi.Result())
        if result['text']:
            return 'hi', result['text']
    if rec_gu.AcceptWaveform(data):
        result = json.loads(rec_gu.Result())
        if result['text']:
            return 'gu', result['text']
    return None, None

# Function to continuously listen and detect language
def listen_and_recognize():
    # Start audio stream
    with sd.RawInputStream(samplerate=16000, blocksize=8000, dtype='int16', channels=1, callback=callback):
        
        rec_en = vosk.KaldiRecognizer(model_en, 16000)
        rec_hi = vosk.KaldiRecognizer(model_hi, 16000)
        rec_gu = vosk.KaldiRecognizer(model_gu, 16000)
        
        print("Listening... Press Ctrl+C to stop.")
        last_spoken_time = time.time()
        
        while True:
            data = q.get()

            # Detect which language was spoken
            lang, text = detect_language(rec_en, rec_hi, rec_gu, data)
            
            # If we get a recognized text
            if lang and text:
                print(f"Recognized {lang.upper()} text: {text}")
                last_spoken_time = time.time()  # Reset timer when speech is detected
            
            # Check for 3 seconds of silence
            if time.time() - last_spoken_time > 3:
                print("Stopped listening due to silence.")
                break



LOG (VoskAPI:ReadDataFiles():model.cc:213) Decoding params beam=13 max-active=7000 lattice-beam=6
LOG (VoskAPI:ReadDataFiles():model.cc:216) Silence phones 1:2:3:4:5:11:12:13:14:15
LOG (VoskAPI:RemoveOrphanNodes():nnet-nnet.cc:948) Removed 0 orphan nodes.
LOG (VoskAPI:RemoveOrphanComponents():nnet-nnet.cc:847) Removing 0 orphan components.
LOG (VoskAPI:ReadDataFiles():model.cc:248) Loading i-vector extractor from /home/raw/coding/cosmos/data_store/audio/vosk-model-en-us-0.22/vosk-model-en-us-0.22/ivector/final.ie
LOG (VoskAPI:ComputeDerivedVars():ivector-extractor.cc:183) Computing derived variables for iVector extractor
LOG (VoskAPI:ComputeDerivedVars():ivector-extractor.cc:204) Done.
LOG (VoskAPI:ReadDataFiles():model.cc:279) Loading HCLG from /home/raw/coding/cosmos/data_store/audio/vosk-model-en-us-0.22/vosk-model-en-us-0.22/graph/HCLG.fst
LOG (VoskAPI:ReadDataFiles():model.cc:297) Loading words from /home/raw/coding/cosmos/data_store/audio/vosk-model-en-us-0.22/vosk-model-en-us-0.

In [10]:
if __name__ == "__main__":
    try:
        listen_and_recognize()
    except KeyboardInterrupt:
        print("\nStopped by user")


Listening... Press Ctrl+C to stop.
Stopped listening due to silence.
