Working from the Hugging Face Whisper Large V3 page (https://huggingface.co/openai/whisper-large-v3) to practice using the transformer.

- Uses environment AKStandard_AIML (Py 3.11.9)

In [1]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from datasets import load_dataset

In [5]:
import torch
print(torch.backends.mps.is_available())  # Should print True
print(torch.backends.mps.is_built())      # Should print True

True
True


In [2]:
device = torch.device("mps")  # Force Metal backend
print(device)

# Create a tensor and move it to GPU
x = torch.ones(5, device=device)
print(x)

mps
tensor([1., 1., 1., 1., 1.], device='mps:0')


In [6]:
print(torch.device("mps"))

mps


In [7]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print("Using device:", device)

Using device: mps


In [9]:
x = torch.ones(5, device="mps")
print(x)


tensor([1., 1., 1., 1., 1.], device='mps:0')


In [12]:
import torch
from transformers import AutoModel, AutoTokenizer
import time

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

for device_str in ["cpu", "mps"]:  # Test both CPU and MPS
    device = torch.device(device_str)
    model = AutoModel.from_pretrained(model_name).to(device)

    text = "This is a test sentence."*32
    inputs = tokenizer(text, return_tensors="pt").to(device)

    # Run inference and time it
    torch.mps.empty_cache() if device_str == "mps" else None
    start_time = time.time()

    with torch.no_grad():
        output = model(**inputs)

    elapsed_time = time.time() - start_time
    print(f"Inference Time on {device_str.upper()}: {elapsed_time:.4f} seconds")

Inference Time on CPU: 0.7383 seconds
Inference Time on MPS: 0.5304 seconds


In [14]:
import torch
import time
from transformers import AutoModel, AutoTokenizer
from datasets import load_dataset

# Choose dataset (SQuAD is large, IMDB is also an option)
dataset = load_dataset("squad", split="train")  # Stanford Q&A dataset

# Extract the first 512 examples (adjust as needed)
texts = [example["context"] for example in dataset.select(range(128))]

# Choose model
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Function to benchmark
def benchmark(device_str):
    device = torch.device(device_str)
    model = AutoModel.from_pretrained(model_name).to(device)

    # Tokenize dataset
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True).to(device)

    # Run inference
    torch.mps.empty_cache() if device_str == "mps" else None
    start_time = time.time()

    with torch.no_grad():
        output = model(**inputs)

    elapsed_time = time.time() - start_time
    print(f"Inference Time on {device_str.upper()}: {elapsed_time:.4f} seconds")

# Run benchmarks
benchmark("cpu")
benchmark("mps")

Inference Time on CPU: 14.8640 seconds
Inference Time on MPS: 8.3764 seconds


In [10]:
import torch
import time

# Set device
device_mps = torch.device("mps")
device_cpu = torch.device("cpu")

# Create a random tensor
size = (10000, 10000)
a_cpu = torch.randn(size, device="cpu")
b_cpu = torch.randn(size, device="cpu")

a_mps = torch.randn(size, device="mps")
b_mps = torch.randn(size, device="mps")

# CPU computation
start = time.time()
c_cpu = a_cpu @ b_cpu  # Matrix multiplication
torch.cuda.synchronize() if torch.cuda.is_available() else None
print("CPU Time:", time.time() - start)

# MPS computation
start = time.time()
c_mps = a_mps @ b_mps  # Matrix multiplication
torch.mps.synchronize()  # Ensure all ops are finished
print("MPS Time:", time.time() - start)

CPU Time: 2.2532479763031006
MPS Time: 2.3821640014648438


In [8]:
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")

tensor([1.], device='mps:0')


In [4]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

device

'cpu'

In [None]:
model_id = "openai/whisper-large-v3"

In [None]:

model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch_dtype,
    device=device,
)

dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
sample = dataset[0]["audio"]

result = pipe(sample)
print(result["text"])


In [9]:
import torch
print(torch.backends.mps)  # Should print True if GPU is usable


<module 'torch.backends.mps' from '/opt/anaconda3/envs/AKStandard_AIML/lib/python3.11/site-packages/torch/backends/mps/__init__.py'>
