# SYNOPSIS: A SUMMARY MODEL

Synopsis is a summarization system which takes any input (text/image/audio/video) and generate a summary of the events present in the input. 

## Image Summarization

All the necessary libraries:

In [None]:
from google.colab import files
from zipfile import ZipFile
from glob import glob
import os
import random
import matplotlib.pyplot as plt 
import matplotlib.image as mpimg 
from os import listdir   #listdir helps searching through a given path for all the files in the directory, return list of files in directory
from pickle import dump #pickle is used to serialize or deserialize a python object structure, pickle..dump is used to store object data to file
# dump() converts a Python object hierarchy into a byte stream
from keras.applications.vgg16 import VGG16   #vgg16 model
from keras.preprocessing.image import load_img #load_img func is used to load image from file as a pil image
#PIL-Python Imaging Library which provides the python interpreter with image editing capabilities
from keras.preprocessing.image import img_to_array  #convert pil image instance to a numpy array
from keras.applications.vgg16 import preprocess_input   #used to preprocess input image to extract features from it
from keras.models import Model  #insctanciate a model to include the necessary layers given some input arrays and tensors and input arrays and tensors
import string   #to perform text based operations like getting rid of puntuations from text strings etc
from pickle import load   # reads the pickled byte stream of one or more python objects from a file object
from numpy import array
import tensorflow
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding
from keras.layers import Dropout
from keras.layers.merge import add
from keras.callbacks import ModelCheckpoint
from numpy import argmax
from nltk.translate.bleu_score import corpus_bleu
from keras.models import load_model

All functions required:

In [None]:
def kaggle_initialize(): 
  #!pip install kaggle - if kaggle is not installed
  files.upload()    #upload kaggle.json file 
  !mkdir -p ~/.kaggle
  !cp kaggle.json ~/.kaggle/
  !chmod 600 ~/.kaggle/kaggle.json  #Change the permission
  !kaggle datasets download -d adityajn105/flickr8k
  file_name="flickr8k.zip"    # path of zip file used
  with ZipFile(file_name,'r') as zip:
    zip.extractall()
    print('Done')

def plot_images(train_dir):   # use to see random images of the dataset used 
    plt.figure(figsize = (10,8))   # train_dir = '/content/Images'
    image = random.choice(os.listdir(train_dir ))
    image_path = train_dir+ '/' + image
    img = mpimg.imread(image_path)/255
    plt.imshow(img)
    plt.axis(False)

# extract features from each photo in the directory
#contains all steps to extract features from each image
#it inputs directory which is declared as the argument to the function
def img_feature_extract(path_dir):
    img_features = dict()    #empty dictionary created to store image features extracted from each photo
    vgg = VGG16()  # load the model using vgg16 class
    ''' re-structure the model by removing the last layer from the loaded model
  vgg models are used to classify the images and we are not interested in classifying the image therefore we are removing the lastlayer
  we are interested in internal representation of image right before classification is made which will be treated as features 
   the model has extracted from the image'''
    vgg.layers.pop()
    vgg = Model(inputs=vgg.inputs, outputs=vgg.layers[-1].output)
    print(vgg.summary())       #return summary of the model to show the architecture of entire vgg16 model
    for img in listdir(path_dir):    # this loop goes through each image in Flicker8k_Dataset
        image = img_to_array(load_img(path_dir + '/' + img, target_size=(224, 224)))  #load image as per the target size & convert the image pixels to a numpy array
        image = preprocess_input(image.reshape((1, image.shape[0], image.shape[1], image.shape[2])))# reshape data for the model that can be inputted in right format in the vgg & prepare the image for the VGG model
        feature = vgg.predict(image, verbose=0)     # get features
        img_features[img.split('.')[0]] = feature       # get image id & store feature
        print('>%s' % img)  
    return img_features

# extract features from each photo in the directory
def extract_single_img_features(file_path):  
    model = VGG16() # load the model
    model.layers.pop() # re-structure the model
    model = Model(inputs=model.inputs, outputs=model.layers[-1].output)
    image = load_img(file_path, target_size=(224, 224))     # load the photo
    image = img_to_array(image) # convert the image pixels to a numpy array
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) # reshape data for the model
    image = preprocess_input(image) # prepare the image for the VGG model
    features = model.predict(image, verbose=0) # get features
    return features

def create_features_pkl():  #function to create pickle file of features    
  features = img_feature_extract('/content/Images')  #extract features from all images
  print('Extracted Features: %d' % len(features))
  dump(features, open('features.pkl', 'wb'))

'''loading description of each image in order to clean the text to remove any puntuations, numbers and other things
converting image description into vocabulary of words so that the embedding layer of lstm model can understand the word tokens
to generate correct captions for images'''

# load files into memory (eg-captions document)
def doc_loader(file_path):
    file = open(file_path, 'r') # open the file(captions.txt) as read-only, this file contains descriptions of all images
    text = file.read() # read all text
    file.close() # close the file
    return text

# extract descriptions for images  &returns image identifiers and corresponding descriptions stored in dictionary format
def descripts_loader(document_path):
    img_caps = dict()
    for line in document_path.split('\n'): #process lines by reading each line from document  while splitting the document by new line character
        tokens = line.split() #splitting each line by white spaces
        if len(line) < 2:
            continue
        img_id, img_desc = tokens[0], tokens[1:] # take the first token as the image id, the rest as the description
        img_id = img_id.split('.')[0] # remove filename from image id
        img_desc = ' '.join(img_desc) # convert description tokens back to string
        if img_id not in img_caps:# create the list when new images appear
            img_caps[img_id] = list()
        img_caps[img_id].append(img_desc)# store descriptions in dictionary with key as image identifier
    return img_caps

# this function is used to clean the text description & takes input dictionary(img_caps) as the argument
def descripts_cleaner(descriptions):
    trans_table = str.maketrans('', '', string.punctuation) # prepare translation table for removing punctuation
    for key, descripts_list in descriptions.items():
        for i in range(len(descripts_list)):
            descripts = descripts_list[i].split()# tokenize
            descripts = [word.lower() for word in descripts] # convert to lower case to maintain consistency
            descripts = [w.translate(trans_table) for w in descripts] # remove punctuations from each token
            descripts = [word for word in descripts if len(word)>1]# remove single character words (remove hanging 's' and 'a')
            descripts = [word for word in descripts if word.isalpha()]  # remove tokens with numbers in them
            descripts_list[i] =  ' '.join(descripts)  # store as string

# convert the loaded descriptions into a vocabulary of words(a set) and returns it
def vocab_add(descripts):
    descriptions = set() # build a unique list of all description strings
    for key in descripts.keys():
# for each image the description is first split by whitespaces and then a set of unique words is formed out of the description
        [descriptions.update(d.split()) for d in descripts[key]]
    return descriptions

#first argument is the mapping dictionary
#second argument is the name of the file where you want to store the cleaned descriptionsalong with the unique image identifiers
# save descriptions to file, one per line
def descripts_saver(descripts, filename):
    lines = list()
    for key, descripts_list in descripts.items():
        for desc in descripts_list:  #apending each file name with a token in description
            lines.append(key + ' ' + desc)
    file = open(filename, 'w')
    file.write('\n'.join(lines))
    file.close()

def create_descripts_txt():  ########################################
    doc = doc_loader('/content/captions.txt')  # load captions document into memory
    descriptions = descripts_loader(doc)  # parse descriptions by extracting descriptions for images
    print('Loaded: %d ' % len(descriptions))   
    descripts_cleaner(descriptions)  #clean the text description
    vocabulary = vocab_add(descriptions) # summarize vocabulary
    print('Vocabulary Size: %d' % len(vocabulary))
    descripts_saver(descriptions, 'descriptions.txt') # save to file

def deleteLineDescripts():  ########################################
 fn = '/content/descriptions.txt'     #removes fist lines from this txt file
 f = open(fn)
 output = []
 str="image"
 for line in f:
   if not line.startswith(str):
    output.append(line)
 f.close()
 f = open(fn, 'w')
 f.writelines(output)
 f.close()

def deleteLineCaptions():  ########################################
 fn = '/content/captions.txt'     #removes fist lines from this txt file
 f = open(fn)
 output = []
 str="image"
 for line in f:
   if not line.startswith(str):
    output.append(line)
 f.close()
 f = open(fn, 'w')
 f.writelines(output)
 f.close()

# load a pre-defined list of photo identifiers
def list_loader(file_path):
    document = doc_loader(file_path)  #reading content of the given file
    data = list()
    for line in document.split('\n'):# process line by line
        if len(line) < 1:# skip empty lines
            continue
        # identifier = line.split('.')[0]
        data.append(line.split('.')[0])# get the image identifier
    return list(set(data)) #returning list of unique identifiers

# load clean descriptions into memory
def clean_descripts_loader(filename, data):
    doc = doc_loader(filename) # load document
    descripts = dict()
    for line in doc.split('\n'):# split line by white space
        tokens = line.split()
        img_id, img_desc = tokens[0], tokens[1:] # split id from description
        if img_id in data:  # skip images not in the set
            if img_id not in descripts: # create list od descriptions
                descripts[img_id] = list()
            desc = 'startseq ' + ' '.join(img_desc) + ' endseq'  # wrap description in tokens for identification purpose
            descripts[img_id].append(desc)  # store in dictionary
    return descripts

# load photo features , first argument pickle file
def img_features_loader(filename, dataset):
    all_features = load(open(filename, 'rb'))  # load all features
    features = {k: all_features[k] for k in dataset} # filter features
    return features
''''
In this project I have used RNN/LSTM model which is a sequence processor which uses an embedding layer which is the word representation of words
with similar meaning to have a similar representation to identify the context of the words. using word embeddings words can be represented in the form 
of real value vectors. LSTM is used to process the text data which in now in the vector form after passing through embedding layer and helps finds 
correlationbetween different words. A pre-trained VGG16 model(ouput layer removed) is used to extract features from images. Both these models are merged
together and processed by a dense layer in order to predict the captions of an image.

'''
#encode descriptions into numbers and map them to numeric values for deep learning model to understand the data
#creating function to generate sequence of words given image features and encoded text

# covert a dictionary of clean descriptions to a list of descriptions
def descripts_list(descripts):
    descriptions = list()
    for key in descripts.keys():
        [descriptions.append(d) for d in descripts[key]]
    return descriptions

# fit a tokenizer given caption descriptions by breaking them into tokens
def token_creator(descripts):
    descriptions = descripts_list(descripts)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(descriptions)   #helps create individual tokens from the descriptions
    return tokenizer

# calculate the length of the description with the most words
def descripts_maxLen(descripts):
    descriptions = descripts_list(descripts) #generates a list of descriptions from the dictionary inputted
    return max(len(d.split()) for d in descriptions)

#encoding the text 
# create sequences of images, input sequences and output words for an image
def sequence_creator(tokenizer, max_length, desc_list, image):
    img_feat, enc_text, output = list(), list(), list()   #img_feat-stores image features ,enc_text-stores encoded text ,output-this is the output list which stores the next word in the sequence
    for descripts in desc_list:  # walk through each description for the image
        seq = tokenizer.texts_to_sequences([descripts])[0]  # encode the sequence
        for i in range(1, len(seq)):  # split one sequence into multiple X,y pairs
            input_seq, output_seq = seq[:i], seq[i]  # split into input and output pair
            input_seq = pad_sequences([seq[:i]], maxlen=max_length)[0]  #split into input and output pair and  pad input sequence
            output_seq = to_categorical([seq[i]], num_classes=vocab_size)[0]# encode output sequence
            img_feat.append(image)  # store the results
            enc_text.append(input_seq)
            output.append(output_seq)
    return array(img_feat), array(enc_text), array(output)
 
#define the captioning model, this function will house the entire architecture of the model used
def model_build(vocab_len, descripts_maxLen):

    # feature extractor model which uses the pretrained vgg16 model
    inputs_1 = Input(shape=(1000,))   #takes input in for of vector with 1000 elements
    # input class of keras.layers is being used
    feature_extractor_1 = Dropout(0.5)(inputs_1)  # dropout layer use for regularization to reduce overfitting (50% dropout)
    feature_extractor_2 = Dense(256, activation='relu')(feature_extractor_1)   #dense layer to process 1000 inputs to output 256 image representation

    # sequence model which makes use of rnn/lstm
    inputs_2 = Input(shape=(descripts_maxLen,))
    sequence_extractor_1 = Embedding(vocab_len, 256, mask_zero=True)(inputs_2)
    sequence_extractor_2 = Dropout(0.5)(sequence_extractor_1) # dropout layer use for regularization to reduce overfitting (50% dropout)
    sequence_extractor_3 = LSTM(256)(sequence_extractor_2)
 
    # decoder model which merges the above 2 models
    decoder_1 = add([feature_extractor_2, sequence_extractor_3])   #merges feature extractor model and sequence model
    decoder_2 = Dense(256, activation='relu')(decoder_1)
    outputs = Dense(vocab_len, activation='softmax')(decoder_2)
 
    # tie it together [image, seq] [word]
    model = Model(inputs=[inputs_1, inputs_2], outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam')
 
    # summarize model
    print(model.summary())
    return model

#Below code is used to progressively load the batch of data,training progresseviy due to lack of memory
# data generator, intended to be used in a call to model.fit_generator()
def feature_data(descriptions, images, tokenizer, descripts_maxLen):
    while 1: #goes over each image and incooperates the for loop below on it
        for key, desc_list in descriptions.items():
            image = images[key][0]      # retrieve the photo feature
            in_img, in_seq, out_word = sequence_creator(tokenizer, descripts_maxLen, desc_list, image)
            yield [[in_img, in_seq], out_word]

def models_20_creator():   #########################################################
  # filename = '/content/captions.txt' # load training dataset (6K)
  train = list_loader('/content/captions.txt')
  train=train[:6000]
  train_descripts = clean_descripts_loader('/content/descriptions.txt', train)  # descriptions
  train_features = img_features_loader('/content/features.pkl', train) # photo features
  tokenizer = token_creator(train_descripts)# prepare tokenizer
  vocab_len = len(tokenizer.word_index) + 1
  maxLen = descripts_maxLen(train_descripts) # determine the maximum sequence length
  img_cap_model = model_build(vocab_len, maxLen)# train the model
  steps = len(train_descripts) #6000 mages
  for i in range(20):  # train the model, run epochs manually and save after each epoch
    generator = feature_data(train_descripts, train_features, tokenizer, maxLen) # create the data generator
    img_cap_model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)  # fit for one epoch, supports progressive loading of data
    img_cap_model.save('model_' + str(i) + '.h5')  # saving the 20 models seperately

# map an integer to a word
def word_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

# generate a description for an image
def descripts_generator(model, tokenizer, image, max_length):
    input_text = 'startseq'     # seed the generation process
    for i in range(max_length):     # iterate over the whole length of the sequence
        captions_seq = tokenizer.texts_to_sequences([input_text])[0]        # integer encode input sequence
        captions_seq = pad_sequences([captions_seq], maxlen=max_length)         # pad input
        predicted_output = model.predict([image,captions_seq], verbose=0)       # predict next word
        # predicted_output = argmax(predicted_output)       # convert probability to integer
        word = word_id(argmax(predicted_output), tokenizer)         # convert probability to integer, map integer to word
        if word is None:        # stop if we cannot map the word
            break
        input_text += ' ' + word        # append as input for generating the next word
        if word == 'endseq':        # stop if we predict the end of the sequence
            break
    return input_text
'''
BLEU (bilingual evaluation understudy) is an algorithm for
evaluating the quality of text which has been machine-translated from one natural language to another.
It helps us evaluate how close a generated text is to the expected text. When there can be multiple answers to your input you can use this metric.
BLEU results depend strongly on the breadth of your domain, the consistency of the test data with the training and tuning data,
and how much data you have available to train. If your models have been trained on a narrow domain, and your training data is
consistent with your test data, you can expect a high BLEU score.
Here I am using cummulative ngram score to evaluate the model. (1 gram refers to single word, 2 gram means pair of words and so on.)
'''
# evaluate the skill of the model
def model_evaluator(model, descripts, images, tokenizer, max_length):
    actual, predicted = list(), list()
    for key, desc_list in descripts.items():    # step over the whole set
        predicted_output = descripts_generator(model, tokenizer, images[key], max_length)       # generated description stored in yhat
        # references = [d.split() for d in desc_list]       # store actual captions in references variable which is then appended to actual list
        actual.append([d.split() for d in desc_list])
        predicted.append(predicted_output.split())
    print('Score->BLEU-1: %f' % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))     # calculate BLEU scores
    print('Score->BLEU-2: %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))
    print('Score->BLEU-3: %f' % corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0)))
    print('Score->BLEU-4: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))
 
def testset_blue_scores():   ###############################################################
  train = list_loader('/content/captions.txt')
  train=train[:6000]
  train_descriptions = clean_descripts_loader('/content/descriptions.txt', train)
  tokenizer = token_creator(train_descriptions)
  max_length = descripts_maxLen(train_descriptions)
  test = list_loader('/content/captions.txt')
  test=test[6000:]
  test_descriptions = clean_descripts_loader('/content/descriptions.txt', test)
  test_features = img_features_loader('/content/features.pkl', test)
  model = load_model('/content/model_19.h5')  # load the model which has minimum loss, in this case it was model_19
  evaluate_model(model, test_descriptions, test_features, tokenizer, max_length) # evaluate model

#Generate Captions for a Fresh Image
def new_img_caption_generator():  ##################################################
    tokenizer = load(open('/content/tokenizer.pkl', 'rb')) # load the tokenizer file to retrive the word tokens
    max_length = 33 # pre-define the max sequence length (from training)
    model = load_model('/content/model_19.h5')# load the model
    photo = extract_single_img_features('/content/sample14.jpg') # load and prepare the photograph
    description = descripts_generator(model, tokenizer, photo, max_length) # generate description
    # query = description
    stopwords = ['startseq','endseq']
    querywords = description.split()
    resultwords  = [word for word in querywords if word.lower() not in stopwords]
    result = ' '.join(resultwords)
    print(result)

## Text Summarization

Libraries used:

In [3]:
! pip install newspaper3k  
import newspaper # for extracting text from url
from newspaper import Article

!pip install spacy
import en_core_web_sm

nlp = en_core_web_sm.load()

import spacy
from spacy.lang.en.stop_words import STOP_WORDS #import a pre-trained NLP pipeline to help interpret the grammatical structure of the text
from string import punctuation
from heapq import nlargest

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting newspaper3k
  Downloading newspaper3k-0.2.8-py3-none-any.whl (211 kB)
[?25l[K     |█▌                              | 10 kB 17.7 MB/s eta 0:00:01[K     |███                             | 20 kB 16.1 MB/s eta 0:00:01[K     |████▋                           | 30 kB 10.5 MB/s eta 0:00:01[K     |██████▏                         | 40 kB 4.8 MB/s eta 0:00:01[K     |███████▊                        | 51 kB 4.6 MB/s eta 0:00:01[K     |█████████▎                      | 61 kB 5.3 MB/s eta 0:00:01[K     |██████████▉                     | 71 kB 6.1 MB/s eta 0:00:01[K     |████████████▍                   | 81 kB 5.6 MB/s eta 0:00:01[K     |██████████████                  | 92 kB 6.2 MB/s eta 0:00:01[K     |███████████████▌                | 102 kB 5.5 MB/s eta 0:00:01[K     |█████████████████               | 112 kB 5.5 MB/s eta 0:00:01[K     |██████████████████▋          

Function used:

In [4]:
#Abstractive Text Summarization – 
#attempts to identify important sections, interpret the context and intelligently generate a summary.

# Steps involved:
# Look at the use frequency of specific words
# Sum the frequencies within each sentence
# Rank the sentences based on this sum

def summarize(text, per):
    nlp = spacy.load('en_core_web_sm')#installing package
    doc= nlp(text)
    tokens=[token.text for token in doc] #generating tokens
    word_frequencies={} #empty dictionary for frequencies
    for word in doc: # Counting the number of times a word is used (not including stop words or punctuation)
        if word.text.lower() not in list(STOP_WORDS): 
            if word.text.lower() not in punctuation:
                if word.text not in word_frequencies.keys():
                    word_frequencies[word.text] = 1 
                else:
                    word_frequencies[word.text] += 1 # if word is present increase the freq val
    max_frequency=max(word_frequencies.values()) # checking which word has the max freq
    for word in word_frequencies.keys():
        word_frequencies[word]=word_frequencies[word]/max_frequency  #normalizing the frequencies, more frequently has a higher normalized count.
    sentence_tokens= [sent for sent in doc.sents] 
    sentence_scores = {} #Calculate the sum of the normalized count for each sentence
    for sent in sentence_tokens: 
        for word in sent:
            if word.text.lower() in word_frequencies.keys():
                if sent not in sentence_scores.keys():                            
                    sentence_scores[sent]=word_frequencies[word.text.lower()]
                else:
                    sentence_scores[sent]+=word_frequencies[word.text.lower()]
    select_length=int(len(sentence_tokens)*per) #Extracting a percentages 
    summary=nlargest(select_length, sentence_scores,key=sentence_scores.get) #selecting the highest ranked sentences, These serve as our summary
    final_summary=[word.text for word in summary] 
    summary=''.join(final_summary)
    return summary 

## Speech to Text: Videos

Installing and importing the required libraries:

In [5]:
! pip install SpeechRecognition # Used to recognise and extract the text from the audio file
! pip install moviepy # Used to extract the audio from the video file

import speech_recognition as sr
import moviepy.editor as mp

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting SpeechRecognition
  Downloading SpeechRecognition-3.8.1-py2.py3-none-any.whl (32.8 MB)
[K     |████████████████████████████████| 32.8 MB 102 kB/s 
[?25hInstalling collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.8.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Imageio: 'ffmpeg-linux64-v3.3.1' was not found on your computer; downloading it now.
Try 1. Download from https://github.com/imageio/imageio-binaries/raw/master/ffmpeg/ffmpeg-linux64-v3.3.1 (43.8 MB)
Downloading: 8192/45929032 bytes (0.0%)1753088/45929032 bytes (3.8%)2744320/45929032 bytes (6.0%)4988928/45929032 bytes (10.9%)7831552/45929032 bytes (17.1%)9871360/45929032 bytes (21.5%)

Function used:

In [6]:
def video_extract(path):

  # Loading and storing the video file into a variable
  clip = mp.VideoFileClip(path)

  # Extracting the audio from the video file and storing it in a 'wav' format audio file named 'converted'
  clip.audio.write_audiofile('/content/converted.wav')

  # Creating the recognizer object which is used to recognise audios
  r = sr.Recognizer()

  # Storing the audio file in a variable, 'audio'
  audio = sr.AudioFile("/content/converted.wav")

  # Iterating through the audio, recognizing it using the Google Speech Recognition API and storing it
  # in a text format into the variable, 'result'
  with audio as source:
    audio_file = r.record(source)
  result = r.recognize_google(audio_file)

  # returning the extracted text
  return(result)

## Speech to Text: Audios

Function used:

In [8]:
def audio_extract(path):

  # Loading and storing the audio file into a variable
  clip = mp.AudioFileClip(path)

  # Converting the audio format to 'wav' as the speech recognition model works best with that and storing
  # it in a file, named 'converted'
  clip.write_audiofile('/content/converted1.wav')

  # Creating the recognizer object which is used to recognise audios
  r = sr.Recognizer()

  # Storing the audio file in a variable, 'audio'
  audio = sr.AudioFile('/content/converted1.wav')

  # Iterating through the audio, recognizing it using the Google Speech Recognition API and storing it
  # in a text format into the variable, 'result'
  with audio as source:
    audio_file = r.record(source)
  result = r.recognize_google(audio_file)

  # returning the extracted text
  return(result)

Main Function:

In [9]:
def main():

  print('Enter what you want to summarize:')
  print('1. Text')
  print('2. Image')
  print('3. Audio')
  print('4. Video')
  choice = int(input('Your choice:'))

  # Text
  if choice == 1:
    print('option 1: Enter the text')
    print('option 2: Enter the url')
    a=int(input('Selected option '))

    if a==1:
      txt=input('Enter the text')
      print('ORIGINAL TEXT')
      print(txt)
      print('SUMMARIZED TEXT')
      print(summarize(txt,0.4))

    elif a==2:
        url= input('Enter the url ')
        article = Article(url)
        article.download()
        article.parse() #download and parse the article to extract the relevant attributes
        #print('ORIGINAL TEXT')
        #print( article.text)
        print('SUMMARIZED TEXT')
        print(summarize(article.text, 0.15))

  # Image
  elif choice == 2:
    new_img_caption_generator()

  # Audio
  elif choice == 3:
    audio_input = input("Enter the audio file: ")
    text = audio_extract(audio_input)
    print(summarize(text,0.4))

  # Video
  elif choice == 4:
    video_input = input("Enter the file: ")
    text = video_extract(video_input)
    print(summarize(text,0.4))

In [10]:
main()

Enter what you want to summarize:
1. Text
2. Image
3. Audio
4. Video
Your choice:4
Enter the file: /content/videoplayback (3).mp4
[MoviePy] Writing audio in /content/converted.wav


100%|██████████| 615/615 [00:00<00:00, 1887.44it/s]

[MoviePy] Done.





I'm taking one other class online and one class at the Bedford campus is actually the first class I've ever had to make videos to Post onlineI'm in your interview speech class with you all along with this class


## Streamlit Implementation

In [None]:
'''! pip install streamlit
import streamlit as st
st.write(text)
#! streamlit run /usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py'''