# Install the libraries.

In [1]:
!pip install openai moviepy opencv-python

Collecting openai
  Downloading openai-1.25.2-py3-none-any.whl (312 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m312.9/312.9 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, openai
Successfully installed h11-0.14.0 httpcore-1.0.5 ht

# Load the libraries.

In [2]:
import moviepy.editor as mp
import base64
from openai import OpenAI
import shutil
import cv2
import os

In [4]:
from google.colab import userdata
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

In [5]:
client = OpenAI()

# Calling in the vison model.

In [7]:
def base64_vision(prompt):

  response = client.chat.completions.create(
      model = "gpt-4-turbo",
      messages=prompt,
      max_tokens = 300,
  )

  print(response.choices[0].message.content)

# Seperating video into individual frames.

In [8]:
def extract_frames(video_path, interval = 1):
  # Step 1:  Check if the 'Frames' directory exists, if not - then create it
  if os.path.exists('Frames'):
    # Delete all files in the Frames directory.
    for filename in os.listdir('Frames'):
      file_path = os.path.join('Frames', filename)

      try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
          os.unlike(file_path)
        elif os.path.isdir(file_path):
          shutil.rmtree(file_path)

      except Exception as e:
        print('Failed to delete %s. Reason: %s' % (file_path, e))

  # Open the video file.
  video = cv2.VideoCapture(video_path)

  # Check if video opened successfully
  if not video.isOpened():
    print("Error opening video file")
    return

  # Get the frame rate of the video
  fps = video.get(cv2.CAP_PROP_FPS)

  # Calculate the frame number to skip
  frame_skip = int(fps * interval)

  frame_count = 0

  while True:
    # Read a frame
    success, frame = video.read()

    # If frame read successfully and its the correct interval
    if success and frame_count % frame_skip == 0:
      # Save the frame
      frame_filename = f'Frames/frame_{frame_count}.jpg'

      cv2.imwrite(frame_filename, frame)

      # if cv2.waitKey(1) & 0xFF == ord('q'):
      #   break

      print(f'Saved {frame_filename}')

    if not success:
      break

    # Release the video capture object
    video.release()
    cv2.destroyAllWindows()

In [9]:
def convert_to_base64(image_path):
  with open(image_path, 'rb') as image_file:
    encoded_string = base64.b64encode(image_file.read()).decode()
    return encoded_string

# Scrapping and transcribing the audio from the video.

In [10]:
def whisper(audio):
  audio_file = open(audio, 'rb')

  transcript = client.audio.transcriptions.create(
      file = audio_file,
      model = 'whisper-1'
  )

  return transcript.text

In [11]:
def extract_audio(video_path):
  # load the video file
  video = mp.VideoFileClip(video_path)

  # extract the audio from the video
  audio = video.audio

  # Save the audio
  if not audio:
    transcript = "There is no audio for this video."
    print(transcript)
    return transcript
  else:
    audio.write_audiofile("video_audio.mp3")
    transcript = whisper("video_audio.mp3")
    print(f"Whisper video transcript: {transcript}")
    return transcript

# Running all the file

In [23]:
def video_GPT():
  frames_directory = "Frames"
  base64frames = []

  # check if the directory exists
  if not os.path.exists(frames_directory):
    os.makedirs(frames_directory)
    print(f"Created {frames_directory} Directory. ")
  else:
    # Clear all the files in the directory
    for filename in os.listdir(frames_directory):
      file_path = os.path.join(frames_directory, filename)

      if os.path.isfile(file_path):
        os.remove(file_path)

  video_path = '/content/test.mp4'
  # video_path = 0
  extract_frames(video_path, interval = 0.5)
  transcript = extract_audio(video_path)

  for filename in sorted(os.listdir(frames_directory)):
    file_path = os.path.join(frames_directory, filename)

    if os.path.isfile(file_path):
      encoded_image = convert_to_base64(file_path)
      base64frames.append(encoded_image)

  prompt = [
      {
          "role": "user",
          "content":[
              f"Explain what is happening in this sequence of frames, and do it concisely."
              f"Here is the transcript of the video audio in case that helps you:{transcript}",
              *map(lambda x: {"image": x, "resize": 480},
                   base64frames),
          ],
      },
  ]
  base64_vision(prompt)

In [24]:
if __name__ == '__main__':
  video_GPT()

Saved Frames/frame_0.jpg
MoviePy - Writing audio in video_audio.mp3




MoviePy - Done.
Whisper video transcript: Oh wait, let's start again, let's start again, right, so I've been practicing and me and Pablo are going to have a race, why are you so focused, we'll go on three, one, two, three. That's marvellous, go, eat it, you ate mine and the last piece.
In this sequence of frames, a man and his bulldog, named Pablo, are preparing to have a fun eating race with slices of food laid out in a line on the floor. The man, clearly engaged in a playful activity, is setting a light-hearted and competitive tone. He counts to three to start the race, during which they will attempt to eat the food slices more quickly than the other. The scene captures a comedic and enjoyable moment between an owner and his pet, emphasizing their bond and the light-hearted competition. The caption "The owner stood no chance" humorously suggests that the bulldog was perhaps much quicker or more eager in consuming the food, outpacing the owner.
