In [None]:
!pip install deepspeech

!curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/deepspeech-0.9.3-models.pbmm
!curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/deepspeech-0.9.3-models.scorer

!curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/audio-0.9.3.tar.gz
!tar xvf audio-0.9.3.tar.gz
!ls -l ./audio/

In [None]:
! pip install youtube_dl
! pip install pydub
! pip install transformers
! pip install sentencepiece
! pip install punctuator
! pip install SpeechRecognition

! apt-get update && apt-get install -y libsndfile1 ffmpeg
! pip install Cython
! pip install bert-extractive-summarizer
! pip install nemo_toolkit[all]
! pip install pynini
! pip install rake_nltk
! pip install nltk

In [None]:
import librosa
import soundfile
from __future__ import unicode_literals
import youtube_dl
from pydub import AudioSegment
from deepspeech import Model
from scipy.io.wavfile import read as wav_read
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config
import torch
import speech_recognition as sr
import os
import contextlib
import wave
from nemo.collections.nlp.models import PunctuationCapitalizationModel
import torchvision
from summarizer import Summarizer
import nltk
from rake_nltk import Rake
import speech_recognition as sr

In [None]:
nltk.download('stopwords')
nltk.download('punkt')

In [1]:
def make_transcript(audio_file):
    model_file_path = "/content/deepspeech-0.9.3-models.pbmm"
    lm_file_path = "/content/deepspeech-0.9.3-models.scorer"
    beam_width = 100
    lm_alpha = 0.93
    lm_beta = 1.18

    model = Model(model_file_path)
    model.enableExternalScorer(lm_file_path)

    model.setScorerAlphaBeta(lm_alpha, lm_beta)
    model.setBeamWidth(beam_width)

    rate, buffer= wav_read(audio_file)
    return model.stt(buffer)

In [None]:
def capital_letters(text):
    capitalized = ""
    if text[0] != text[0].capitalize():
        capitalized = capitalized + text[0].capitalize()
        text = text[1:]

    for num in range(len(text)):
        if text[num - 2] + text[num - 1] != ". ":
            capitalized = capitalized + text[num]
        if text[num - 2] + text[num - 1] == ". ":
            capitalized = capitalized + text[num].capitalize()

    return capitalized

In [None]:
def set_punctuation(transcript):
    PunctuationCapitalizationModel.list_available_models()
    model = PunctuationCapitalizationModel.from_pretrained("punctuation_en_bert")
    punct_transcript = model.add_punctuation_capitalization([transcript])
    punct_transcript = punct_transcript[0]

    return punct_transcript

In [None]:
def get_tube(url):

    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
    }
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(url, download=False)
        video_id = info_dict.get('id', None)
        video_title = info_dict.get('title', None)
        video_duration = info_dict.get('duration', None)

    min = int(video_duration / 60)
    sec = video_duration % 60
    if sec < 10:
      duration = f"{min}:0{sec}"
    else:
      duration = f"{min}:{sec}"
    
    video_info = {}
    video_info["path"] = f'{video_id}.mp3'
    video_info["title"] = video_title
    video_info["duration"] = duration

    ydl_opts.update({'outtmpl':video_info["path"]})

    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
        
    return video_info

In [None]:
def get_transcript(url):
    video_info = get_tube(url)
    path = video_info["path"]
    path_audio =  f"/content/{path}"
    shortcut = path_audio[:-4]
    path_wav = f"{shortcut}.wav"

    sound = AudioSegment.from_file(path_audio)
    sound.export(path_wav, format="wav")

    #os.remove(path_audio) 

    audio, sr = librosa.load(path_wav, sr=16000)
    soundfile.write(path_wav, data = audio, samplerate = sr)
    
    transcript = make_transcript(path_wav)
    #transcript = set_punctuation(transcript)
    video_info["transcript"] = transcript

    #os.remove(path_wav)

    return video_info

In [None]:
def keywords(transcript):
  rake_nltk_var = Rake()
  rake_nltk_var.extract_keywords_from_text(transcript)
  key_output = rake_nltk_var.get_ranked_phrases()
  #keyword_extracted = rake_nltk_var.get_ranked_phrases_with_scores()

  return key_output[:5]

In [None]:
def abstract_summary(transcript):
    model = T5ForConditionalGeneration.from_pretrained('t5-base')
    tokenizer = T5Tokenizer.from_pretrained('t5-base')
    device = torch.device('cpu')

    tokenized_text = tokenizer.encode(transcript, return_tensors="pt").to(device)
    summary_ids = model.generate(tokenized_text,
                                      num_beams=4,
                                      no_repeat_ngram_size=2,
                                      min_length=50,
                                      max_length=300,
                                      early_stopping=True)

    abs_output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    abs_output = capital_letters(abs_output)

    return abs_output

In [None]:
def extractive_summary(transcript):
    model = Summarizer()
    result = model(transcript, min_length=50)
    ext_output = "".join(result)
    ext_output = capital_letters(ext_output)

    return ext_output

In [None]:
def assembling_url(url):
  video_info = get_transcript(url)
  transcript = video_info["transcript"]
  video_title = video_info["title"]
  video_duration = video_info["duration"]

  key_output = keywords(transcript)
  abs_output = abstract_summary(transcript)
  ext_output = extractive_summary(transcript)

  return video_info, abs_output, ext_output

In [None]:
import time

start = time.time()
video_info, abs, ext = assembling_url("https://www.youtube.com/watch?v=oaTssshwmwo")
end = time.time()

length = int(end - start)
min = int(length / 60)
sec = length % 60
if sec < 10:
  duration = f"{min}:0{sec}"
else:
  duration = f"{min}:{sec}"

print(duration)

NameError: ignored

In [None]:
video_info['transcript']


'intelligence is really techniques that help machines and computers mimic human behavior is the device being smart how it becomes smart under the hood is the next layer of machine learning which are the general techniques or variety of techniques device smart'

In [None]:
abs


'Artificial artificial artificial becoming smart is really techniques that help machines mimic human behavior is the device being smart how it becomes smart under the next layer of machine learning is artificial artificial artificial artificial artificial devices becoming devices become smart devices being intelligent is actually techniques and techniques the devices smart is smart is smart. Smart. Device smart...... Machine learning or artificial.. The next level of artificial intelligence is machine. Artificial. So'

In [None]:
ext

"D'artifice licence is really techniques that help machines and computers mimic human behavior, Pitious nesting. Artificial intelligence is going to be used in everything To give you two examples. Otonabee of any kind are not going to be autonomous without artificial intelligence in the medical field."

In [None]:
#get_transcript("https://www.youtube.com/watch?v=lh4d1fjzfNA")