In [1]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, pipeline, AutoProcessor
from datasets import load_dataset

In [2]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

In [3]:
model_id = 'openai/whisper-tiny'

In [4]:
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, 
                                                  torch_dtype=torch_dtype, 
                                                  low_cpu_mem_usage = True,
                                                  use_safetensors = True
                                                  )

In [5]:
model.to(device)

WhisperForConditionalGeneration(
  (model): WhisperModel(
    (encoder): WhisperEncoder(
      (conv1): Conv1d(80, 384, kernel_size=(3,), stride=(1,), padding=(1,))
      (conv2): Conv1d(384, 384, kernel_size=(3,), stride=(2,), padding=(1,))
      (embed_positions): Embedding(1500, 384)
      (layers): ModuleList(
        (0-3): 4 x WhisperEncoderLayer(
          (self_attn): WhisperAttention(
            (k_proj): Linear(in_features=384, out_features=384, bias=False)
            (v_proj): Linear(in_features=384, out_features=384, bias=True)
            (q_proj): Linear(in_features=384, out_features=384, bias=True)
            (out_proj): Linear(in_features=384, out_features=384, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=384, out_features=1536, bias=True)
          (fc2): Linear(in_features=1536, out_features=384, bias=True)
          (fin

In [6]:
processor = AutoProcessor.from_pretrained(model_id)

In [7]:
pipe = pipeline("automatic-speech-recognition",
                model=model,
                tokenizer=processor.tokenizer,
                feature_extractor=processor.feature_extractor,
                max_new_tokens = 128,
                chunk_length_s = 30,
                batch_size = 16,
                return_timestamps = True,
                torch_dtype=torch_dtype,
                device=device 
                )

In [8]:
dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")

distilled_student_sentiment_classifier = pipeline(
    model="lxyuan/distilbert-base-multilingual-cased-sentiments-student", 
    return_all_scores=True
)



In [9]:
sample = dataset[0]['audio']
result = pipe("Eren.mp3")
result['text']

" Ah! Sshhh! Sshhh! H-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h I gotta wait a huddle, thanks from here! Please stop this bloodbath and let us ready make the best of what's left of the world! Aaron... I want... to help you carry the burden of your sins. Please, Aaron! Quit running! Let's go home! The rumbling will not stop and the future of the island of Paradee will not be left to the winds of fate. Wait, Aaron! Why are you still letting us use the titan's powers? We could talk as long as we like here! Aaron! I will hold freedom in my hands, by taking it from this world. You are free to oppose me, and defend the world's freedom. And there was only one way for this to be resolved. Fight! You will only stop my events when you stop me from breathing. What happened? You okay? So, Commander, what's next?"

In [10]:
distilled_student_sentiment_classifier = pipeline(
    model="lxyuan/distilbert-base-multilingual-cased-sentiments-student", 
    return_all_scores=True
)



In [11]:
sent = distilled_student_sentiment_classifier(result['text'])
sent = sent[0]
sent

[{'label': 'positive', 'score': 0.43457165360450745},
 {'label': 'neutral', 'score': 0.1020752415060997},
 {'label': 'negative', 'score': 0.46335312724113464}]

In [12]:
df = sent[0]
df

{'label': 'positive', 'score': 0.43457165360450745}

In [13]:
ratings = []
label = []


for i in range(0,3):
    ratings.append(sent[i]['score'])
    label.append(sent[i]['label'])

maxi = max(ratings)
rightLabel = ratings.index(maxi)


print(f"{result['text']}\n The Given text has a {label[rightLabel]} Marking with a overall score of {maxi*100:.2f}%")

 Ah! Sshhh! Sshhh! H-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h-h I gotta wait a huddle, thanks from here! Please stop this bloodbath and let us ready make the best of what's left of the world! Aaron... I want... to help you carry the burden of your sins. Please, Aaron! Quit running! Let's go home! The rumbling will not stop and the future of the island of Paradee will not be left to the winds of fate. Wait, Aaron! Why are you still letting us use the titan's powers? We could talk as long as we like here! Aaron! I will hold freedom in my hands, by taking it from this world. You are free to oppose me, and defend the world's freedom. And there was only one way for this to be resolved. Fight! You will only stop my events when you stop me from breathing. What happened? You okay? So, Commander, what's next?
 The Given text has a negative Marking with a overall score of 46.34%
