In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
def check_gpu():
    """Check and return the best available device (GPU or CPU)."""
    if torch.cuda.is_available():
        return "cuda"
    elif torch.backends.mps.is_available():  # Apple Silicon GPU
        return "mps"
    else:
        return "cpu"

check_gpu()

'cuda'

In [3]:
import torch
from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer


def load_llama_model(model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0", quantize=False):
    """
    Load a LLAMA model variant using PyTorch.
    
    Args:
        model_name: The model to load from Hugging Face
        quantize: Whether to apply 4-bit quantization
        
    Returns:
        model: The loaded model
        tokenizer: The tokenizer for the model
    """
    print(f"Loading model: {model_name}")
    device = check_gpu()
    
    # # Load the tokenizer
    # tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    # Set up quantization options if enabled
    # if quantize:
    #     print("Loading with 4-bit quantization...")
    #     model = AutoModelForCausalLM.from_pretrained(
    #         model_name,
    #         torch_dtype=torch.float16,
    #         device_map="auto"
    #     )
    # else:
    #     # Load without quantization
    #     model = AutoModelForCausalLM.from_pretrained(
    #         model_name,
    #         torch_dtype=torch.float16,
    #         device_map="auto"
    #     )
    # model = AutoModel.from_pretrained(
    #     model_name
    # )
    # Load model directly

    tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
    model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
    # Explicitly move model to GPU if available
    model.to(device)
    
    print(f"Model loaded with {sum(p.numel() for p in model.parameters()) / 1e6:.2f}M parameters")
    return model, tokenizer


In [4]:

def generate_text(model, tokenizer, prompt, max_new_tokens=2056, temperature=0.7):
    """
    Generate text using the loaded model.
    
    Args:
        model: The loaded model
        tokenizer: The tokenizer for the model
        prompt: The input prompt to generate from
        max_new_tokens: Maximum number of new tokens to generate
        temperature: Controls randomness (lower is more deterministic)
        
    Returns:
        The generated text as a string
    """
    # Prepare the model inputs
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    
    # Generate text
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=0.95,
            top_k=50,
            repetition_penalty=1.1,
        )
    
    # Decode the generated text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text

In [5]:
# Choose model and whether to apply quantization
model_name = "deepseek-ai/Janus-Pro-7B"
use_quantization = False  # Set to True for larger models

In [6]:
model, tokenizer = load_llama_model(model_name, quantize=use_quantization)


Loading model: deepseek-ai/Janus-Pro-7B


2025-03-16 19:11:24.917635: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-16 19:11:24.999401: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742123485.028833   10471 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742123485.038294   10471 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1742123485.108307   10471 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

Model loaded with 1777.09M parameters


In [None]:

# Try a simple prompt
prompt = "You are an AI bot that writes essays. The following is an essay about visual language machine learning models (1500 word):"

print("\nPrompt:", prompt)
print("\nGenerating response...")

response = generate_text(model, tokenizer, prompt)

print("\nGenerated response:")
print(response)

import pyttsx3

engine = pyttsx3.init()

# Select a voice by index (based on previous output)
voice_index = 0  # Change this to select different voices
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[voice_index].id)

# Set speech rate (optional)
engine.setProperty('rate', 150)

# Speak text
text = "Hello, this is a custom voice test!"
engine.say(response)
engine.runAndWait()


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



Prompt: You are an AI bot that writes essays. The following is an essay about visual language machine learning models (1500 word):

Generating response...

Generated response:
You are an AI bot that writes essays. The following is an essay about visual language machine learning models (1500 word): 

[...]
Alright, so I've been thinking about how to apply machine learning models using visual languages. This seems like a fascinating area because it combines two powerful concepts: the ability to represent information in a structured and efficient way (visual languages) and the flexibility of machine learning algorithms.

First off, what exactly is a visual language? From my limited understanding, it's a system where data or signals can be encoded into a set of symbols or visual elements that can be decoded by another system. Common examples include icons, shapes, colors, or diagrams. These visual elements have specific meanings and rules for how they can be used together.

In this contex

In [None]:
print(next(model.parameters()).device)  # Should print "cuda:0"


In [None]:
import torch
print("CUDA Available:", torch.cuda.is_available())  # Should print True
print("CUDA Device Count:", torch.cuda.device_count())  # Should be > 0
print("CUDA Device Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")
print("MPS Available:", torch.backends.mps.is_available())  # For Apple M1/M2 GPUs


In [11]:
!pip install pyttsx3


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting pyttsx3
  Downloading pyttsx3-2.98-py3-none-any.whl.metadata (3.8 kB)
Downloading pyttsx3-2.98-py3-none-any.whl (34 kB)
Installing collected packages: pyttsx3
Successfully installed pyttsx3-2.98


In [12]:
import pyttsx3

engine = pyttsx3.init()

# Get available voices
voices = engine.getProperty('voices')

# Print available voices
for index, voice in enumerate(voices):
    print(f"Voice {index}: {voice.name} - {voice.id}")


Voice 0: Afrikaans - Afrikaans
Voice 1: Amharic - Amharic
Voice 2: Aragonese - Aragonese
Voice 3: Arabic - Arabic
Voice 4: Assamese - Assamese
Voice 5: Azerbaijani - Azerbaijani
Voice 6: Bashkir - Bashkir
Voice 7: Belarusian - Belarusian
Voice 8: Bulgarian - Bulgarian
Voice 9: Bengali - Bengali
Voice 10: Bishnupriya Manipuri - Bishnupriya Manipuri
Voice 11: Bosnian - Bosnian
Voice 12: Catalan - Catalan
Voice 13: Cherokee  - Cherokee 
Voice 14: Chinese (Mandarin, latin as English) - Chinese (Mandarin, latin as English)
Voice 15: Chinese (Mandarin, latin as Pinyin) - Chinese (Mandarin, latin as Pinyin)
Voice 16: Czech - Czech
Voice 17: Chuvash - Chuvash
Voice 18: Welsh - Welsh
Voice 19: Danish - Danish
Voice 20: German - German
Voice 21: Greek - Greek
Voice 22: English (Caribbean) - English (Caribbean)
Voice 23: English (Great Britain) - English (Great Britain)
Voice 24: English (Scotland) - English (Scotland)
Voice 25: English (Lancaster) - English (Lancaster)
Voice 26: English (West Mi

In [13]:
import pyttsx3

engine = pyttsx3.init()

# Select a voice by index (based on previous output)
voice_index = 22  # Change this to select different voices
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[voice_index].id)

# Set speech rate (optional)
engine.setProperty('rate', 150)

# Speak text
text = "Hello, this is a custom voice test!"
engine.say(text)
engine.runAndWait()
