In [1]:
!pip install flask-ngrok
!pip install sumy
!pip install youtube_transcript_api

Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25
Collecting sumy
  Downloading sumy-0.9.0-py2.py3-none-any.whl (87 kB)
[K     |████████████████████████████████| 87 kB 5.5 MB/s 
Collecting pycountry>=18.2.23
  Downloading pycountry-20.7.3.tar.gz (10.1 MB)
[K     |████████████████████████████████| 10.1 MB 36.0 MB/s 
Collecting breadability>=0.1.20
  Downloading breadability-0.1.20.tar.gz (32 kB)
Building wheels for collected packages: breadability, pycountry
  Building wheel for breadability (setup.py) ... [?25l[?25hdone
  Created wheel for breadability: filename=breadability-0.1.20-py2.py3-none-any.whl size=21711 sha256=7eb9d098d4f15c4091b48b789858756d15bd57bbe0ab69133812498a23fbf835
  Stored in directory: /root/.cache/pip/wheels/d4/bf/51/81d27ad638e1a6dca4f362ecc33d1e2c764b8ea7ec751b8fc1
  Building wheel for pycountry (setup.py) ... [?25l[?25hdone
  Created wheel 

In [3]:
from flask_ngrok import run_with_ngrok
from flask import Flask, request, jsonify,send_file
import time
import base64
import csv
import requests
# Summarizer Import (Our Another File: summarizer.py)
from Summarizer import sumy_lsa_summarize, sumy_luhn_summarize, sumy_text_rank_summarize, sumy_lex_rank_summarize, sumy_klsum_rank_summarize

In [4]:
# Sumy Imports
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.summarizers.kl import KLSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words

# Other Imports
from string import punctuation
from heapq import nlargest
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, VideoUnavailable, TooManyRequests, \
    TranscriptsDisabled, NoTranscriptAvailable
from youtube_transcript_api.formatters import TextFormatter

import nltk
nltk.download('punkt')

# NLTK Imports
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk.classify.util as util
from nltk.classify import NaiveBayesClassifier
from nltk.metrics import BigramAssocMeasures
from nltk.collocations import BigramCollocationFinder as BCF
import itertools
import pickle
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [15]:
from classifier import commentExtract, sentiment

In [16]:
app = Flask(__name__)
run_with_ngrok(app)

In [None]:
@app.route("/")
def home():
    print(request.base_url)
    return jsonify({'msg':'success'})

@app.route('/summarize/', methods=['GET'])
def transcript_fetched_query():
    # Getting argument from the request
    video_id = request.args.get('id')  # video_id of the YouTube Video
    percent = request.args.get('percent')  # percentage of the summary
    choice = request.args.get('choice')  # summarization choice

    # Checking whether all parameters exist or not
    if video_id and percent and choice:
        # Every parameter exists here: checking validity of choice
        choice_list = ["sumy-lsa-sum", "sumy-luhn-sum", "sumy-text-rank-sum", "sumy-lex-rank-sum", "sumy-klsum-rank-sum" ]
        if choice in choice_list:
            # Choice Correct: Proceeding with Transcript Fetch and its Summarization
            # Using Formatter to store and format received subtitles properly.
            formatter = TextFormatter()
            transcript = YouTubeTranscriptApi.get_transcript(video_id)
            formatted_text = formatter.format_transcript(transcript).replace("\n", " ")

            # Checking the length of sentences in formatted_text string, before summarizing it.
            num_sent_text = len(nltk.sent_tokenize(formatted_text))

            # Pre-check if the summary will have at least one line .
            select_length = int(num_sent_text * (int(percent) / 100))

            # Summary will have at least 1 line. Proceed to summarize.
            if select_length > 0:

                # Condition satisfied for summarization, summarizing the formatted_text based on choice.
                if num_sent_text > 1:

                    # Summarizing Formatted Text based upon the request's choice
                    if choice == "sumy-lsa-sum":
                        summary = sumy_lsa_summarize(formatted_text,
                                                      percent)  # Sumy for extractive summary using LSA.
                    elif choice == "sumy-luhn-sum":
                        summary = sumy_luhn_summarize(formatted_text,
                                                      percent)  # Sumy Library for TF-IDF Based Summary.
                    elif choice == "sumy-text-rank-sum":
                        summary = sumy_text_rank_summarize(formatted_text,
                                                            percent)  # Sumy for Text Rank Based Summary.
                    elif choice == "sumy-lex-rank-sum":
                        summary = sumy_lex_rank_summarize(formatted_text,
                                                            percent)  # Sumy for Lex Rank Based Summary.
                    elif choice == "sumy-klsum-rank-sum":
                        summary = sumy_klsum_rank_summarize(formatted_text,
                                                            percent)  # Sumy for Klsum Rank Based Summary.
                    else:
                        summary = None

                    # Checking the length of sentences in summary string.
                    num_sent_summary = len(nltk.sent_tokenize(summary))

                    # Returning Result
                    response_list = {
                        # 'fetched_transcript': formatted_text,
                        'processed_summary': summary,
                        'length_original': len(formatted_text),
                        'length_summary': len(summary),
                    }

                    return jsonify(success=True,
                                    message="Subtitles for this video was fetched and summarized successfully.",
                                    response=response_list), 200

                else:
                    return jsonify(success=False,
                                    message="Subtitles are not formatted properly for this video. Unable to "
                                            "summarize. There is a possibility that there is no punctuation in "
                                            "subtitles of your video.",
                                    response=None), 400

            else:
                return jsonify(success=False,
                                message="Number of lines in the subtitles of your video is not "
                                        "enough to generate a summary. Number of sentences in your video: {}"
                                .format(num_sent_text),
                                response=None), 400

        else:
            return jsonify(success=False,
                            message="Invalid Choice: Please create your request with correct choice.",
                            response=None), 400
    elif video_id is None or len(video_id) <= 0:
        # video_id parameter doesn't exist in the request.
        return jsonify(success=False,
                        message="Video ID is not present in the request. "
                                "Please check that you have added id in your request correctly.",
                        response=None), 400
    elif percent is None or len(percent) <= 0:
        # percent parameter doesn't exist.
        return jsonify(success=False,
                        message="No Percentage value is present in the request. "
                                "Please check whether your request is correct.",
                        response=None), 400
    elif choice is None or len(choice) <= 0:
        # choice parameter for the summary type doesn't exist here.
        return jsonify(success=False,
                        message="No Choice parameter is present in the request. "
                                "Please request along with your choice correctly.",
                        response=None), 400
    else:
        # Some another edge case happened. Return this message for preventing exception throw.
        return jsonify(success=False,
                        message="Please request the server with your arguments correctly.",
                        response=None), 400

@app.route('/analysis/', methods=['GET'])
def videoAnalysis():
  video_id = request.args.get('id') 
  comments = commentExtract(video_id)
  result = sentiment(comments)
  return jsonify(success=True,
                message="Video Analysis completed",
                response=result), 200

app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


 * Running on http://5dae-34-86-132-116.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


127.0.0.1 - - [14/Dec/2021 17:26:58] "[31m[1mGET /summarize/?id=vnVwvPpdFeY&percent=1&choice=sumy-lsa-sum HTTP/1.1[0m" 400 -
127.0.0.1 - - [14/Dec/2021 17:28:29] "[37mGET /summarize/?id=vnVwvPpdFeY&percent=50&choice=sumy-lsa-sum HTTP/1.1[0m" 200 -
127.0.0.1 - - [14/Dec/2021 17:28:38] "[31m[1mGET /summarize/?id=vnVwvPpdFeY&percent=1&choice=sumy-lsa-sum HTTP/1.1[0m" 400 -
[2021-12-14 17:31:54,448] ERROR in app: Exception on /summarize/ [GET]
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "/usr/local/lib/python3.7/dist-packages/flask/_compat.py", line 39, in reraise
    raise value
  File "/u

Comments downloading


127.0.0.1 - - [14/Dec/2021 17:37:45] "[37mGET /analysis/?id=lrEkYscgbqE HTTP/1.1[0m" 200 -
