In [1]:
!pip install deepspeech

!curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/deepspeech-0.9.3-models.pbmm
!curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/deepspeech-0.9.3-models.scorer

!curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/audio-0.9.3.tar.gz
!tar xvf audio-0.9.3.tar.gz
!ls -l ./audio/

Collecting deepspeech
  Downloading deepspeech-0.9.3-cp37-cp37m-manylinux1_x86_64.whl (9.2 MB)
[K     |████████████████████████████████| 9.2 MB 5.3 MB/s 
Installing collected packages: deepspeech
Successfully installed deepspeech-0.9.3
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   634  100   634    0     0   3464      0 --:--:-- --:--:-- --:--:--  3464
100  180M  100  180M    0     0  62.2M      0  0:00:02  0:00:02 --:--:-- 72.3M
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   667  100   667    0     0   3420      0 --:--:-- --:--:-- --:--:--  3420
100  909M  100  909M    0     0  23.7M      0  0:00:38  0:00:38 --:--:--  174M
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   To

In [2]:
! pip install youtube_dl
! pip install pydub
! pip install transformers
! pip install sentencepiece

Collecting youtube_dl
  Downloading youtube_dl-2021.6.6-py2.py3-none-any.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 4.0 MB/s 
[?25hInstalling collected packages: youtube-dl
Successfully installed youtube-dl-2021.6.6
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
Collecting transformers
  Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 3.8 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.1.2-py3-none-any.whl (59 kB)
[K     |████████████████████████████████| 59 kB 6.8 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 48.7 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K    

In [12]:
import librosa
import soundfile
from __future__ import unicode_literals
import youtube_dl
from pydub import AudioSegment
from deepspeech import Model
from scipy.io.wavfile import read as wav_read
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
import os

In [4]:
def make_transcript(audio_file):
    model_file_path = "/content/deepspeech-0.9.3-models.pbmm"
    lm_file_path = "/content/deepspeech-0.9.3-models.scorer"
    beam_width = 100
    lm_alpha = 0.93
    lm_beta = 1.18

    model = Model(model_file_path)
    model.enableExternalScorer(lm_file_path)

    model.setScorerAlphaBeta(lm_alpha, lm_beta)
    model.setBeamWidth(beam_width)

    rate, buffer= wav_read(audio_file)
    return model.stt(buffer)

In [5]:
def get_tube(url):

    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
    }
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(url, download=False)
        video_title = info_dict.get('id', None)

    path = f'{video_title}.mp3'

    ydl_opts.update({'outtmpl':path})

    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
        
    return path

In [15]:
def get_transcript(url):
    path =  f"/content/{get_tube(url)}"
    shortcut = path[:-4]
    path_wav = f"{shortcut}.wav"

    sound = AudioSegment.from_file(path)
    sound.export(path_wav, format="wav")

    os.remove(path) 

    audio, sr = librosa.load(path_wav, sr=16000)

    soundfile.write(path_wav, data = audio, samplerate = sr)
    
    transcript = make_transcript(path_wav)

    os.remove(path_wav)

    return transcript

In [7]:
def transcript_summarizer(url):
    transcript = get_transcript(url)

    model = T5ForConditionalGeneration.from_pretrained('t5-base')
    tokenizer = T5Tokenizer.from_pretrained('t5-base')
    device = torch.device('cpu')

    preprocess_text = transcript.strip().replace("\n","")
    t5_prepared_Text = "summarize: "+preprocess_text
    #print ("original text preprocessed: \n", preprocess_text)

    tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt").to(device)


    summary_ids = model.generate(tokenized_text,
                                      num_beams=4,
                                      no_repeat_ngram_size=2,
                                      min_length=30,
                                      max_length=100,
                                      early_stopping=True)

    output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    return print("\n\nSummarized text: \n",output)

In [16]:
import time

start = time.time()
transcript_summarizer("https://www.youtube.com/watch?v=A8SAZ8lI4m4")
end = time.time()
print(end - start)

[youtube] A8SAZ8lI4m4: Downloading webpage
[youtube] A8SAZ8lI4m4: Downloading webpage
[download] Destination: A8SAZ8lI4m4.mp3
[download] 100% of 4.00MiB in 00:54
[ffmpeg] Post-process file A8SAZ8lI4m4.mp3 exists, skipping


Token indices sequence length is longer than the specified maximum sequence length for this model (1159 > 512). Running this sequence through the model will result in indexing errors




Summarized text: 
 sweden was in the union with no way formosanta until the splitting of five the war caused this dissolution why did wedneebak her so a loss of countries in europe have done the same before like russia who had easaidh closeness with breitenstein to invade it and rapidan for it. the prince of denmark was declared the king of norway in eighteen fourteen and proclaimed its independence against
312.51552391052246
