# **Pipeline 1. The Generator and the Commentator**

copyright 2024, Denis Rothman

**Notebook summary:**  
* Step 1.Creation, collection, and processing videos    
* Step 2.Downloading the video  
* Step 3.Splitting the video into frames  
* Step 4.Commenting the video     

Activate the Program Administrator in the **Administrator-Pipeline 1** section to run the functions, control the output, save the comments, and delete the files.


# Installing the environment

## Importing modules and libraries

In [None]:
from IPython.display import HTML # to display videos
import base64 # to encode videos as base64
from base64 import b64encode # to encode videos as base64
import os # to interact with the operating system
import subprocess # to run commands
import time # to measure execution time
import csv # to save comments
import uuid # to generate unique ids
import cv2 # to split videos
from PIL import Image # to display videos
import pandas as pd # to display comments
import numpy as np # to use Numerical Python
from io import BytesIO #for a binary stream of data in memory

## GitHub

In [None]:
def download(directory, filename):
    # The base URL of the image files in the GitHub repository
    base_url = 'https://raw.githubusercontent.com/Denis2054/RAG-Driven-Generative-AI/main/'

    # Complete URL for the file
    file_url = f"{base_url}{directory}/{filename}"

    # Use curl to download the file, including an Authorization header for the private token
    try:
        # Prepare the curl command with the Authorization header
        # PRIVATE_TOKEN will be removed at publication
        PRIVATE_TOKEN = "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"
        curl_command = f'curl -H "Authorization: token {PRIVATE_TOKEN}" -o {filename} {file_url}'

        # Execute the curl command
        subprocess.run(curl_command, check=True, shell=True)
        print(f"Downloaded '{filename}' successfully.")
    except subprocess.CalledProcessError:
        print(f"Failed to download '{filename}'. Check the URL, your internet connection and the file path")

## OpenAI

In [None]:
#You can retrieve your API key from a file(1)
# or enter it manually(2)
#Comment this cell if you want to enter your key manually.

#(1)Retrieve the API Key from a file
#Store you key in a file and read it(you can type it directly in the notebook but it will be visible for somebody next to you)
from google.colab import drive
drive.mount('/content/drive')
f = open("drive/MyDrive/files/api_key.txt", "r")
API_KEY=f.readline()
f.close()

Mounted at /content/drive


In [None]:
try:
  import openai
except:
  !pip install openai==0.28.0
  #!pip install openai==1.33.0
  import openai

Collecting openai==0.28.0
  Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
Successfully installed openai-0.28.0


In [None]:
#(2) Enter your manually by
# replacing API_KEY by your key.
#The OpenAI Key
os.environ['OPENAI_API_KEY'] =API_KEY
openai.api_key = os.getenv("OPENAI_API_KEY")

# Step 1.Creation, collection, and processing videos

This section is described in the `Videos_dataset_visualization.ipynb` notebook in the GitHub repository.


# Step 2.Downloading the video

The download function is in the *GitHub* subsection of *Installing the environement* of this notebook.   

It will be called by the Vector Store Administrator in the *Administrator-Pipeline 1* section of this notebook.

### Displaying the video

In [None]:
# Open the file in binary mode
def display_video(file_name):
  with open(file_name, 'rb') as file:
      video_data = file.read()

  # Encode the video file as base64
  video_url = b64encode(video_data).decode()

  # Create an HTML string with the embedded video
  html = f'''
  <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{video_url}" type="video/mp4">
  Your browser does not support the video tag.
  </video>
  '''
  # Display the video
  HTML(html)
  # Return the HTML object
  return HTML(html)

# Step 3.Splitting a video into frames




In [None]:
def split_file(file_name):
  video_path = file_name
  cap = cv2.VideoCapture(video_path)

  frame_number = 0
  while cap.isOpened():
      ret, frame = cap.read()
      if not ret:
          break

      cv2.imwrite(f"frame_{frame_number}.jpg", frame)
      frame_number += 1
      print(f"Frame {frame_number} saved.")

  cap.release()

# Step 4.Commenting the video

In [None]:
def generate_comment(response_data):
    """Extract relevant information from GPT-4 Vision response."""
    try:
        caption = response_data.choices[0].message.content
        return caption
    except (KeyError, AttributeError):
        print("Error extracting caption from response.")
        return "No caption available."

In [None]:
def save_comment(comment, frame_number, file_name):
    """Save the comment to a text file formatted for seamless loading into a pandas DataFrame."""
    # Append .csv to the provided file name to create the complete file name
    path = f"{file_name}.csv"

    # Check if the file exists to determine if we need to write headers
    write_header = not os.path.exists(path)

    with open(path, 'a', newline='') as f:
        writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        if write_header:
            writer.writerow(['ID', 'FrameNumber', 'Comment', 'FileName'])  # Write the header if the file is being created
        # Generate a unique UUID for each comment
        unique_id = str(uuid.uuid4())
        # Write the data
        writer.writerow([unique_id, frame_number, comment, file_name])


In [None]:
def generate_openai_comments(filename):
  video_folder = "/content"  # Folder containing your image frames
  total_frames = len([file for file in os.listdir(video_folder) if file.endswith('.jpg')])

  nb=3      # sample frequency
  counter=0 #sample frequency counter
  for frame_number in range(total_frames):
      counter+=1 # sampler
      if counter==nb and counter<total_frames:
        counter=0
        print(f"Analyzing frame {frame_number}...")
        image_path = os.path.join(video_folder, f"frame_{frame_number}.jpg")
        try:
            with open(image_path, "rb") as image_file:
                image_data = image_file.read()

                response = openai.ChatCompletion.create(
                    model="gpt-4-vision-preview",
                    messages=[
                        {
                            "role": "user",
                            "content": [
                                {"type": "text", "text": "What is happening in this image?"},
                                {
                                    "type": "image",
                                    "image_url": f"data:image/jpeg;base64,{base64.b64encode(image_data).decode('utf-8')}"
                                },
                            ],
                       }
                    ],
                    max_tokens=150,
               )
            comment = generate_comment(response)
            save_comment(comment, frame_number,file_name)

        except FileNotFoundError:
            print(f"Error: Frame {frame_number} not found.")
        except Exception as e:
            print(f"Unexpected error: {e}")

### Displaying the comments

In [None]:
# Read the video comments file into a pandas DataFrame
def display_comments(file_name):
  # Append .csv to the provided file name to create the complete file name
  path = f"{file_name}.csv"
  df = pd.read_csv(path)
  return df

# Administrator-Pipeline 1

 ## Running jobs

In [None]:
session_time = time.time()  # Start timing before the request

# select file
print("1.Collecting video")
file_name = "skiing1.mp4" # Enter the name of the video file to process here
print(f"Video: {file_name}")

# 1. Downloading video
print("2.Downloading video: downloading from GitHub")
directory = "Chapter10/videos"
download(directory,file_name)

# 1.2.Displaying video
print("2.Downloading video: displaying video")
display_video(file_name)

# 2.Splitting video
print("3.Splitting the video")
split_file(file_name)

# 3.Commenting the video
print("4:Commenting video: creating comments")
start_time = time.time()  # Start timing before the request

video_folder = "/content"  # Folder containing your image frames
total_frames = len([file for file in os.listdir(video_folder) if file.endswith('.jpg')])
print(total_frames)
generate_openai_comments(file_name)

response_time = time.time() - session_time  # Measure response time
total_time = time.time() - start_time  # Start timing before the request

print(f"Response Time: {response_time:.2f} seconds")  # Print response time
print(f"Total Time: {total_time:.2f} seconds")  # Print response time

1.Collecting video
Video: skiing1.mp4
2.Downloading video: downloading from GitHub
Downloaded 'skiing1.mp4' successfully.
2.Downloading video: displaying video
3.Splitting the video
Frame 1 saved.
Frame 2 saved.
Frame 3 saved.
Frame 4 saved.
Frame 5 saved.
Frame 6 saved.
Frame 7 saved.
Frame 8 saved.
Frame 9 saved.
Frame 10 saved.
Frame 11 saved.
Frame 12 saved.
Frame 13 saved.
Frame 14 saved.
Frame 15 saved.
Frame 16 saved.
Frame 17 saved.
Frame 18 saved.
Frame 19 saved.
Frame 20 saved.
Frame 21 saved.
Frame 22 saved.
Frame 23 saved.
Frame 24 saved.
Frame 25 saved.
Frame 26 saved.
Frame 27 saved.
Frame 28 saved.
Frame 29 saved.
Frame 30 saved.
Frame 31 saved.
Frame 32 saved.
Frame 33 saved.
Frame 34 saved.
Frame 35 saved.
Frame 36 saved.
Frame 37 saved.
Frame 38 saved.
Frame 39 saved.
Frame 40 saved.
Frame 41 saved.
Frame 42 saved.
Frame 43 saved.
Frame 44 saved.
Frame 45 saved.
Frame 46 saved.
Frame 47 saved.
Frame 48 saved.
Frame 49 saved.
Frame 50 saved.
Frame 51 saved.
Frame 52 sa

## Controlling output

In [None]:
# Displaying video
print("Downloading video: displaying video")
display_video(file_name)

Downloading video: displaying video


In [None]:
# Display comments
print("Commenting video: displaying comments")
display_comments(file_name)

Commenting video: displaying comments


Unnamed: 0,ID,FrameNumber,Comment,FileName
0,55eefc36-811f-4938-83a4-700b926713dc,2,The image shows two individuals skiing on a sn...,skiing1.mp4
1,9868b1bb-0e16-4581-9ecf-af63eb0aef4b,5,This image appears to show three skiers descen...,skiing1.mp4
2,a36d90d3-041a-4445-9a86-4ffd630e4324,8,The image shows three individuals skiing downh...,skiing1.mp4
3,ad4b696b-8eb4-4763-a645-9813a419bbf5,11,This image shows three individuals skiing on a...,skiing1.mp4
4,b1ced818-1583-4a3a-a704-e4d53408a8cf,14,"In this image, there are several individuals s...",skiing1.mp4
5,d69f5662-09ef-4ffb-8fe8-accd90ac92a5,17,"In the image, there are people engaged in alpi...",skiing1.mp4
6,d1d36205-fbd0-4a8a-8423-d0f13048da5f,20,"In the image, we see three individuals engaged...",skiing1.mp4
7,c87b7056-eb9b-4b91-8e35-679ee8563682,23,This image depicts a group of skiers on a snow...,skiing1.mp4
8,018dfd12-37bc-46cd-a9e3-ab568d95d4b7,26,"In the image, there are several people engaged...",skiing1.mp4
9,f8ef4502-a17d-4e30-a293-eb6d9cb89482,29,"In the image, there are several individuals sk...",skiing1.mp4


## Saving comments

In [None]:
print(file_name)

skiing1.mp4


In [None]:
# Append .csv to the provided file name to create the complete file name
cpath = f"{file_name}.csv"

In [None]:
print(cpath)

skiing1.mp4.csv


In [None]:
# Ensure the file exists and double checking before saving the comments
save=False  # double checking before saving the comments
if save==True:  # double checking before saving the comments
  if os.path.exists(cpath):
      # Use the Python variable 'path' correctly in the shell command
      !cp {cpath} /content/drive/MyDrive/files/videos/{cpath}
      print(f"File {cpath} copied successfully.")
  else:
      print(f"No such file: {cpath}")

In some cases, we can also save the frames if necessary.


In [None]:
 # Create a directory for the video frames in a location and save the frames
 #!cp *.jpg /content/drive/MyDrive/files/videos/basketball3/

## Deleting files

In [None]:
delf=False  # double checking before deleting the files in a session
if delf==True:
  !rm -f *.mp4 # video files
  !rm -f *.jpg # frames
  !rm -f *.csv # comments