In [20]:
from youtube_transcript_api import YouTubeTranscriptApi
import json
jsonFile = json.dumps(YouTubeTranscriptApi.get_transcript('VFJLMOk6daQ'))


In [5]:
#!/usr/bin/python

# Usage example:
# python captions.py --videoid='<video_id>' --name='<name>' --file='<file>' --language='<language>' --action='action'

import httplib2
import os
import sys

from apiclient.discovery import build_from_document
from apiclient.errors import HttpError
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
from oauth2client.tools import argparser, run_flow


# The CLIENT_SECRETS_FILE variable specifies the name of a file that contains

# the OAuth 2.0 information for this application, including its client_id and
# client_secret. You can acquire an OAuth 2.0 client ID and client secret from
# the {{ Google Cloud Console }} at
# {{ https://cloud.google.com/console }}.
# Please ensure that you have enabled the YouTube Data API for your project.
# For more information about using OAuth2 to access the YouTube Data API, see:
#   https://developers.google.com/youtube/v3/guides/authentication
# For more information about the client_secrets.json file format, see:
#   https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
CLIENT_SECRETS_FILE = "client_secrets.json"

# This OAuth 2.0 access scope allows for full read/write access to the
# authenticated user's account and requires requests to use an SSL connection.
YOUTUBE_READ_WRITE_SSL_SCOPE = "https://www.googleapis.com/auth/youtube.force-ssl"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"

# This variable defines a message to display if the CLIENT_SECRETS_FILE is
# missing.
MISSING_CLIENT_SECRETS_MESSAGE = """
WARNING: Please configure OAuth 2.0

To make this sample run you will need to populate the client_secrets.json file
found at:
   %s
with information from the APIs Console
https://console.developers.google.com

For more information about the client_secrets.json file format, please visit:
https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
""" % os.path.abspath(os.path.join(os.path.dirname('./'),
                                   CLIENT_SECRETS_FILE))


In [6]:
# Authorize the request and store authorization credentials.
def get_authenticated_service(args):
  flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE, scope=YOUTUBE_READ_WRITE_SSL_SCOPE,
    message=MISSING_CLIENT_SECRETS_MESSAGE)

  storage = Storage("%s-oauth2.json" % sys.argv[0])
  credentials = storage.get()

  if credentials is None or credentials.invalid:
    credentials = run_flow(flow, storage, args)

  # Trusted testers can download this discovery document from the developers page
  # and it should be in the same directory with the code.
  with open("youtube-v3-api-captions.json", "r") as f:
    doc = f.read()
    return build_from_document(doc, http=credentials.authorize(httplib2.Http()))

In [16]:
# Call the API's captions.list method to list the existing caption tracks.
def list_captions(youtube, video_id):
  results = youtube.captions().list(
    part="snippet",
    videoId=video_id
  ).execute()

  for item in results["items"]:
    id = item["id"]
    name = item["snippet"]["name"]
    language = item["snippet"]["language"]
    print ("Caption track '%s(%s)' in '%s' language." % (name, id, language))

  return results["items"]


# Call the API's captions.insert method to upload a caption track in draft status.
def upload_caption(youtube, video_id, language, name, file):
  insert_result = youtube.captions().insert(
    part="snippet",
    body=dict(
      snippet=dict(
        videoId=video_id,
        language=language,
        name=name,
        isDraft=True
      )
    ),
    media_body=file
  ).execute()

  id = insert_result["id"]
  name = insert_result["snippet"]["name"]
  language = insert_result["snippet"]["language"]
  status = insert_result["snippet"]["status"]
  print("Uploaded caption track '%s(%s) in '%s' language, '%s' status." % (name,
      id, language, status))


# Call the API's captions.update method to update an existing caption track's draft status
# and publish it. If a new binary file is present, update the track with the file as well.
def update_caption(youtube, caption_id, file):
  update_result = youtube.captions().update(
    part="snippet",
    body=dict(
      id=caption_id,
      snippet=dict(
        isDraft=False
      )
    ),
    media_body=file
  ).execute()

  name = update_result["snippet"]["name"]
  isDraft = update_result["snippet"]["isDraft"]
  print ("Updated caption track '%s' draft status to be: '%s'" % (name, isDraft))
  if file:
    print ("and updated the track with the new uploaded file.")


# Call the API's captions.download method to download an existing caption track.
def download_caption(youtube, caption_id, tfmt):
  subtitle = youtube.captions().download(
    id=caption_id,
    tfmt=tfmt
  ).execute()

  print ("First line of caption track: %s" % (subtitle))

# Call the API's captions.delete method to delete an existing caption track.
def delete_caption(youtube, caption_id):
  youtube.captions().delete(
    id=caption_id
  ).execute()

  print ("caption track '%s' deleted succesfully" % (caption_id))


if __name__ == "__main__":
  # The "videoid" option specifies the YouTube video ID that uniquely
  # identifies the video for which the caption track will be uploaded.
  argparser.add_argument("--videoid=rmfzwwrCrrU",
    help="Required; ID for video for which the caption track will be uploaded.")
  # The "name" option specifies the name of the caption trackto be used.
  argparser.add_argument("--name", help="Caption track name", default="YouTube for Developers")
  # The "file" option specifies the binary file to be uploaded as a caption track.
  argparser.add_argument("--file", help="Captions track file to upload")
  # The "language" option specifies the language of the caption track to be uploaded.
  argparser.add_argument("--language", help="Caption track language", default="en")
  # The "captionid" option specifies the ID of the caption track to be processed.
  argparser.add_argument("--captionid", help="Required; ID of the caption track to be processed")
  # The "action" option specifies the action to be processed.
  argparser.add_argument("--action", help="Action", default="all")


  args = argparser.parse_args()

  if (args.action in ('upload', 'list', 'all')):
    if not args.videoid:
          exit("Please specify videoid using the --videoid= parameter.")

  if (args.action in ('update', 'download', 'delete')):
    if not args.captionid:
          exit("Please specify captionid using the --captionid= parameter.")

  if (args.action in ('upload', 'all')):
    if not args.file:
      exit("Please specify a caption track file using the --file= parameter.")
    if not os.path.exists(args.file):
      exit("Please specify a valid file using the --file= parameter.")

  youtube = get_authenticated_service(args)
  try:
    if args.action == 'upload':
      upload_caption(youtube, args.videoid, args.language, args.name, args.file)
    elif args.action == 'list':
      list_captions(youtube, args.videoid)
    elif args.action == 'update':
      update_caption(youtube, args.captionid, args.file);
    elif args.action == 'download':
      download_caption(youtube, args.captionid, 'srt')
    elif args.action == 'delete':
      delete_caption(youtube, args.captionid);
    else:
      # All the available methods are used in sequence just for the sake of an example.
      upload_caption(youtube, args.videoid, args.language, args.name, args.file)
      captions = list_captions(youtube, args.videoid)

      if captions:
        first_caption_id = captions[0]['id'];
        update_caption(youtube, first_caption_id, None);
        download_caption(youtube, first_caption_id, 'srt')
        delete_caption(youtube, first_caption_id);
  except (HttpError, e):
    print ("An HTTP error %d occurred:\n%s" % (e.resp.status, e.content))
  else:
    print ("Created and managed caption tracks.")

ArgumentError: argument --videoid=rmfzwwrCrrU: conflicting option string: --videoid=rmfzwwrCrrU

In [11]:
print('I give up!!')
print('youtube API only allows the uploader to get transcript which is really limiting :(')

I give up!!
youtube API only allows the uploader to get transcript which is really limiting :(


In [10]:
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals

from sumy.parsers.html import HtmlParser
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer as Summarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words


LANGUAGE = "english"
SENTENCES_COUNT = 2

STRING = '''
Two years ago, Facebook-owned Oculus quietly began demoing a prototype of a stand-alone VR headset codenamed Santa Cruz. Unlike the Oculus Rift, this prototype didn’t need to be wired to a PC to work. Like the Oculus Rift, it promised an immersive quality that might help people actually understand why they’d want to hang out in a virtual environment, wearing a device on their face.
Now that prototype is coming out. At its annual developer conference today in San Jose, Oculus unveiled the Quest, the official name for the Santa Cruz headset. Oculus plans to ship it next spring for $399.
That puts Quest squarely in the middle of Oculus’s other two headsets: the mobile Oculus Go, priced at $199, and the Oculus Rift, which also costs around $400 but requires a high-powered PC to use. But the Quest headset isn’t just the end result of a product differentiation strategy. It’s a showcase for potentially game-changing virtual reality technology, and part of Facebook CEO Mark Zuckerberg’s lofty goal to get 1 billion people using VR, as he expressed during the event's keynote address
The Quest hardware looks unremarkable from the outside, and it uses the same Touch controllers as the Rift. But there are four wide-angle sensors on the headset that are part of the technology that makes the Quest stand out. The Quest has six degrees of freedom—“6DoF” as it’s sometimes called—which allows your head to be tracked positionally, rather than just rotationally. In other words, you can move, not just look around.
High-powered, wired headsets like the Oculus Rift and the HTC Vive also offer 6DoF, but it’s technically complicated to achieve with a stand-alone headset; the Oculus Go only offers three degrees of freedom. The Lenovo Mirage Solo stand-alone headset, released earlier this year, can deliver some 6DoF head movement, but only within a small area, and its hand controllers are 3DoF.
Usually this six degrees of freedom is accomplished using sensors around the physical room, in addition to sensors on the VR headset. In the case of the Quest, no room sensors are needed. Instead, Facebook VR executive Hugo Barra said, the Quest is using “advanced computer vision algorithms to track your position in real time, without any external sensors.
The company is calling this technology Insight. The four wide-angle sensors on the headset look for edges, corners, and distinct features in the room around you, and then build a three-dimensional map of the environment. Barra said the headset is calculating an estimate of your head position “every millisecond,” and can even deliver precise tracking in larger than room-scale areas. As an example, the company set up a 4,000 square-foot “arena” at Oculus Connect, where people can use Quest headsets to play a free-roaming version of the Wild West shooter Dead and Buried.
Basically, Oculus is taking a technology that usually requires a bunch of sensors at multiple touch points around a room, and recreating the same experience with just the four sensors on your head, using machine learning and computer vision. It’s doing this using every piece of room info it can grab: floors, ceilings, light, wall art, furniture. There inevitably could be challenges with this; things like super shiny floors and white, unmarked walls could theoretically trip it up. Barra said Oculus has tested Insight in “hundreds of different home spaces” and is confident it will work even in these environments.
The optics in the Quest appear to be the same as the Oculus Go; Quest has a display resolution of 1600 by 1400 per eye. However, the Quest has a “lens spacing adjustment to help maximize visual comfort,” according to Oculus. (While the Go display is good, IPD adjustment is key for a comfortable experience.) The built-in audio on the headset is also supposed to be improved from the Go’s sound, and the Quest has 64 gigabytes of internal storage, up from the Go’s 32 gigs.
Of course, VR headsets are only as compelling as the apps that run on them. And considering that Facebook’s business is getting people sucked into using its apps, Facebook-owned Oculus might be better positioned to offer addictive games and social teleconferencing apps in VR than some of its competitors. Oculus says it has 50 app titles lined up for the Quest’s launch next year—including standout Rift games Robo Recall and The Climb—with more in the works.
As Zuckerberg pointed out in his remarks, VR has a long way to go before reaching that billion-user milestone, or even 2 percent of it. He identified ergonomics and a self-sustaining software ecosystem as the two fronts necessary to get there, but there are other, more important ones to deal with first: wires and a steep learning curve. Stand-alone headsets represent a solution to those particular stumbling blocks—and while this year’s Oculus Go stepped gingerly over those blocks, the Quest is opting for a flying leap.
'''


if __name__ == "__main__":
    url = "http://web.archive.org/web/20130116005443/http://tomayko.com/writings/rest-to-my-wife"
    parser = PlaintextParser.from_string(STRING, Tokenizer(LANGUAGE))
    # or for plain text files
    # parser = PlaintextParser.from_file("document.txt", Tokenizer(LANGUAGE))
    stemmer = Stemmer(LANGUAGE)

    summarizer = Summarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)

    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        print(sentence)

It’s a showcase for potentially game-changing virtual reality technology, and part of Facebook CEO Mark Zuckerberg’s lofty goal to get 1 billion people using VR, as he expressed during the event's keynote address The Quest hardware looks unremarkable from the outside, and it uses the same Touch controllers as the Rift.
Instead, Facebook VR executive Hugo Barra said, the Quest is using “advanced computer vision algorithms to track your position in real time, without any external sensors.
