%%html
<style>
pre {
    white-space: pre-wrap;
}
</style>


# Pipeline example

## Pipeline

In [7]:
## The pipeline() makes it simple to use any model from the Hub for inference on any language, computer vision, speech, and multimodal tasks.

In [5]:
!pip install python-ffmpeg



In [1]:
from transformers import pipeline
import ffmpeg

In [2]:
generator = pipeline(task="automatic-speech-recognition")
audio_link = "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac"
generator(audio_link)

No model was supplied, defaulted to facebook/wav2vec2-base-960h and revision 55bb623 (https://huggingface.co/facebook/wav2vec2-base-960h).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'text': 'I HAVE A DREAM BUT ONE DAY THIS NATION WILL RISE UP LIVE UP THE TRUE MEANING OF ITS TREES'}

In [5]:
model = "openai/whisper-small"
# use openai/whisper-large for better result
generator = pipeline(model=model)
generator(audio_link)

{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}

In [6]:
# multiple input case
generator(
    [
        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac",
        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac",
    ]
)

[{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'},
 {'text': ' He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.'}]

## parameter

In [None]:
# generator= pipeline(model=model, my_parameter=1)
# out = generator(...) # this will use my_parameter=1
# out = generator(..., my_parameter=2) # this will override and assign my_parameter=1
# out = generator(...) # this will use my_parameter=1 by default

## device

In [8]:
import torch
print(torch.cuda.device_count())

0


In [9]:
# this code will not work as this device has no gpu
generator = pipeline(model=model, device=0)
#!pip install accelerate
generator = pipeline(model="openai/whisper-large", device_map="auto")

AssertionError: Torch not compiled with CUDA enabled

## Batch Size

In [None]:
# By default, pipelines will not batch inference 
generator = pipeline(model=model, batch_size=2)
audio_file_name = [f"audio_{i}.flac" for i in range(10)]
text = generator(audio_file_name)

# This runs the pipeline on the 10 provided audio files, but it will pass them in batches of 2 to the model (which is on a GPU, where batching is more likely to help) without requiring any further code from you. The output should always match what you would have received without batching. It is only meant as a way to help you get more speed out of a pipeline.

In [None]:
# This runs the pipeline on the 10 provided audio files, but it will pass them in batches of 2 to the model (which is on a GPU, where batching is more likely to help) without requiring any further code from you. The output should always match what you would have received without batching. It is only meant as a way to help you get more speed out of a pipeline.

In [2]:
!pip install tensorflow

Collecting tensorflow
  Obtaining dependency information for tensorflow from https://files.pythonhosted.org/packages/9e/b8/ed5f794359d05cd0bffb894c6418da87b93016ee17b669d55c45d1bd5d5b/tensorflow-2.13.0-cp311-cp311-win_amd64.whl.metadata
  Downloading tensorflow-2.13.0-cp311-cp311-win_amd64.whl.metadata (2.6 kB)
Collecting tensorflow-intel==2.13.0 (from tensorflow)
  Obtaining dependency information for tensorflow-intel==2.13.0 from https://files.pythonhosted.org/packages/2f/2f/3c84f675931ce3bcbc7e23acbba1e5d7f05ce769adab48322de57a9f5928/tensorflow_intel-2.13.0-cp311-cp311-win_amd64.whl.metadata
  Downloading tensorflow_intel-2.13.0-cp311-cp311-win_amd64.whl.metadata (4.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.13.0->tensorflow)
  Downloading absl_py-1.4.0-py3-none-any.whl (126 kB)
     ---------------------------------------- 0.0/126.5 kB ? eta -:--:--
     --------- ----------------------------- 30.7/126.5 kB 1.3 MB/s eta 0:00:01
     ----------------- ----------------

   ----------- ---------------------------- 76.7/276.6 MB 1.1 MB/s eta 0:03:07
   ----------- ---------------------------- 76.8/276.6 MB 1.1 MB/s eta 0:02:57
   ----------- ---------------------------- 77.0/276.6 MB 1.2 MB/s eta 0:02:54
   ----------- ---------------------------- 77.1/276.6 MB 1.2 MB/s eta 0:02:53
   ----------- ---------------------------- 77.3/276.6 MB 1.2 MB/s eta 0:02:53
   ----------- ---------------------------- 77.4/276.6 MB 1.2 MB/s eta 0:02:52
   ----------- ---------------------------- 77.4/276.6 MB 1.2 MB/s eta 0:02:52
   ----------- ---------------------------- 77.5/276.6 MB 1.2 MB/s eta 0:02:54
   ----------- ---------------------------- 77.6/276.6 MB 1.1 MB/s eta 0:02:54
   ----------- ---------------------------- 77.6/276.6 MB 1.1 MB/s eta 0:02:54
   ----------- ---------------------------- 77.8/276.6 MB 1.1 MB/s eta 0:02:55
   ----------- ---------------------------- 77.8/276.6 MB 1.1 MB/s eta 0:02:54
   ----------- ---------------------------- 77.9/276

In [4]:
from transformers import CountVectorizer

# Sample corpus
corpus = [
    "This is the first document.",
    "This document is the second document.",
    "And this is the third one.",
    "Is this the first document?",
]

# Create a CountVectorizer object
vectorizer = CountVectorizer()

# Fit and transform the corpus
X = vectorizer.fit_transform(corpus)

# Print the feature names
print(vectorizer.get_feature_names())

# Print the feature matrix
print(X.toarray())

ImportError: cannot import name 'CountVectorizer' from 'transformers' (C:\Users\arman\anaconda3\Lib\site-packages\transformers\__init__.py)