A simple Chatbot using model of hugging face

In [None]:
from transformers.utils import logging
logging.set_verbosity_error()

In [None]:
from transformers import pipeline

In [None]:
chatbot = pipeline(task="conversational",
                   model="facebook/blenderbot-400M-distill")

In [None]:
user_message = '''What are some fun activities I can do in the Summer?'''

In [None]:
from transformers import Conversation

In [None]:
conversation = Conversation(user_message)

In [None]:
print(conversation)

In [None]:
conversation = chatbot(conversation)

In [None]:
print(conversation)

In [None]:
conversation.add_message({"role":"user",
                          "content": ''' 
                          how can i learn to code?.'''})

In [None]:
print(conversation)

In [None]:
print(chatbot(conversation))

In [None]:
import gc
del conversation
gc.collect()

Transalation and Summarizing

In [None]:
import torch

In [None]:
translator = pipeline(task="translation",
                      model="facebook/nllb-200-distilled-600M",
                      torch_dtype = torch.bfloat16)

In [None]:
text = """\
My puppy is adorable, \
Your kitten is cute.
Her panda is friendly.
His llama is thoughtful. \
We all have nice pets!"""

In [None]:
text_translated = translator(text,
                             src_lang="eng_Latn",
                             tgt_lang="fra_Latn")

In [None]:
text_translated

Free up some memory before continuing

In [None]:
import gc
del text_translated
gc.collect()

Summarization

In [None]:
summarizer = pipeline(task="summarization",
                        model="facebook/bart-large-cnn",
                        torch_dtype = torch.bfloat16)

In [None]:
text = """Paris is the capital and most populous city of France, with
          an estimated population of 2,175,601 residents as of 2018,
          in an area of more than 105 square kilometres (41 square
          miles). The City of Paris is the centre and seat of
          government of the region and province of Île-de-France, or
          Paris Region, which has an estimated population of
          12,174,880, or about 18 percent of the population of France
          as of 2017."""

In [None]:
summary = summarizer(text, min_length=10, max_length=100)

In [None]:
summary

In [None]:
del summarizer
gc.collect()

Sentence  Embeddings

An embedding refers to a learned representation of words or entities in a vector space, where words or entities with similar meanings are mapped to similar vectors.

In [None]:
from sentence_transformers import SentenceTransformer

In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [None]:
sentences1 = ['The dog sits outside',
              'A man is playing guitar',
              'The movies are awesome']

In [None]:
embeddings1 = model.encode(sentences1,convert_to_tensor=True)

In [None]:
sentences2 = ['The dog plays in the garden',
              'A man plays musical instrument.',
              'The new movie is so great']

In [None]:
embeddings2 = model.encode(sentences2,convert_to_tensor=True)

In [None]:
print(embeddings2)

In [None]:
from sentence_transformers import util

In [None]:
cosine_scores = util.cos_sim(embeddings1, embeddings2)

In [None]:
print(cosine_scores
      )

In [None]:
for i in range(len(sentences1)):
    print('{} \t\t {} \t\t Score: {:.4f}'.format(sentences1[i], sentences2[i], cosine_scores[i][i]) )

In [None]:
del model
gc.collect()

Zero-Shot Audio Classification

In [None]:
from datasets import load_dataset

dataset = load_dataset("ashraq/esc50", split='train[:10]')

In [None]:
audio_sample  = dataset[0]

In [None]:
audio_sample

In [None]:
from IPython.display import Audio as IPythonAudio
IPythonAudio(audio_sample["audio"]["array"],
             rate=audio_sample["audio"]["sampling_rate"])

Build the Audio Classification pipeline using Transformers Library

In [None]:
zero_shot_classifier = pipeline(task="zero-shot-audio-classification",
model ="laion/clap-htsat-unfused")

In [None]:
zero_shot_classifier.feature_extractor.sampling_rate

In [None]:
audio_sample["audio"]["sampling_rate"]

In [None]:
from datasets import Audio

dataset = dataset.cast_column(
    "audio",
    Audio(sampling_rate=48_000)
)

In [None]:
audio_sample = dataset[1]

In [None]:
audio_sample

In [None]:
candidate_labels = ["Sound of a dog","Sound of vacuum cleaner"]

In [None]:
zero_shot_classifier(audio_sample["audio"]["array"],candidate_labels = candidate_labels)

In [None]:
candidate_labels = ["Sound of a child crying",
                    "Sound of vacuum cleaner",
                    "Sound of a bird singing",
                    "Sound of an airplane"]

In [None]:
zero_shot_classifier(audio_sample["audio"]["array"],candidate_labels = candidate_labels)

In [None]:
del zero_shot_classifier
gc.collect()

Automatic Speech Recognition

In [None]:
from datasets import load_dataset
from transformers import pipeline

In [None]:
dataset = load_dataset("librispeech_asr",split="train.clean.100", streaming=True, trust_remote_code = True)

In [None]:
example = next(iter(dataset))

In [None]:
dataset_head = dataset.take(5)
list(dataset_head)

In [None]:
list(dataset_head)[2]

In [None]:
example

In [None]:
asr = pipeline(task="automatic-speech-recognition",
               model="distil-whisper/distil-small.en")

In [None]:
asr.feature_extractor.sampling_rate

In [None]:
example['audio']['sampling_rate']

In [None]:
asr(example["audio"]["array"])["text"]

In [None]:
import os 
import gradio as gr

In [None]:
demo=gr.Blocks()

In [None]:
def transcribe_speech(filepath):
    if filepath is None:
        gr.Warning("No audio found, please retry.")
        return ""
    output = asr(filepath)
    return output["text"]

In [None]:
mic_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=gr.Audio(sources="microphone",
                    type="filepath"),
    outputs=gr.Textbox(label="Transcription",
                       lines=3),
    allow_flagging="never")

In [None]:
file_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=gr.Audio(sources="upload",
                    type="filepath"),
    outputs=gr.Textbox(label="Transcription",
                       lines=3),
    allow_flagging="never",
)

In [None]:
with demo:
    gr.TabbedInterface(
        [mic_transcribe, 
         file_transcribe],
        ["Transcribe Microphone",
         "Transcribe File"],
    )

demo.launch(debug=True,share=True)

In [None]:
demo.close()

Text to Speech

In [None]:
from transformers.utils import logging

logging.set_verbosity_error()

In [28]:
from transformers import pipeline

narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs")



In [29]:
text = """
Researchers at the Allen Institute for AI, \
HuggingFace, Microsoft, the University of Washington, \
Carnegie Mellon University, and the Hebrew University of \
Jerusalem developed a tool that measures atmospheric \
carbon emitted by cloud servers while training machine \
learning models. After a model’s size, the biggest variables \
were the server’s location and time of day it was active.
"""

In [None]:
narrated_text = narrator(text)

In [None]:
from IPython.display import Audio as IPythonAudio

IPythonAudio(narrated_text["audio"][0],
             rate=narrated_text["sampling_rate"])