In [None]:
schema = {
  "classes": [
    {
      "class": "IndexedVideos",
      "properties": [
        {
          "name": "filename",
          "dataType": ["string"]
        },
        {
          "name": "index_number",
          "dataType": ["int"]
        },
        {
          "name": "src_path",
          "dataType": ["string"]
        },
        {
          "name": "type",
          "dataType": ["string"]
        }
      ]
    },
    {
      "class": "Videos",
      "properties": [
        {
          "name": "category",
          "dataType": ["string"]
        },
        {
          "name": "creator",
          "dataType": ["string"]
        },
        {
          "name": "description",
          "dataType": ["string"],
          "vectorizer": "text2vec-contextionary"
        },
        {
          "name": "file_size",
          "dataType": ["int"]
        },
        {
          "name": "filename",
          "dataType": ["string"]
        },
        {
          "name": "frame_rate",
          "dataType": ["float"]
        },
        {
          "name": "frame_src_path",
          "dataType": ["string"]
        },
        {
          "name": "gpt3_embeddings",
          "dataType": ["string"],
          "vectorizer": "gpt3-vectorizer"
        },
        {
          "name": "has_audio",
          "dataType": ["boolean"]
        },
        {
          "name": "index_number",
          "dataType": ["int"]
        },
        {
          "name": "keywords",
          "dataType": ["string"],
          "vectorizer": "text2vec-contextionary"
        },
        {
          "name": "original_resolution",
          "dataType": ["string"]
        },
        {
          "name": "resnet50_embeddings",
          "dataType": ["string"],
          "vectorizer": "resnet50-vectorizer"
        },
        {
          "name": "src_path",
          "dataType": ["string"]
        },
        {
          "name": "summary",
          "dataType": ["string"],
          "vectorizer": "text2vec-contextionary"
        },
        {
          "name": "type",
          "dataType": ["string"]
        }
      ],
      "moduleConfig": {
        "vectorization": {
          "vectorizeClassName": "Videos",
          "vectorizePropertyName": ["summary", "description", "resnet50_embeddings", "gpt3_embeddings"]
        }
      }
    }
  ]
}


In [None]:

import pandas as pd
import cv2
import numpy as np
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image


In [None]:

# Define your DataFrame schema here
class VideoMetadata:
    def __init__(self, filename, index_number, src_path, video_type):
        self.filename = filename
        self.index_number = index_number
        self.src_path = src_path
        self.video_type = video_type
        self.category = None
        self.creator = None
        self.description = None
        self.file_size = None
        self.frame_rate = None
        self.frame_src_path = None
        self.gpt3_embeddings = None
        self.has_audio = None
        self.keywords = None
        self.original_resolution = None
        self.resnet50_embeddings = None
        self.summary = None


In [None]:

# Initialize your DataFrame here
df_columns = ['filename', 'index_number', 'src_path', 'video_type', 'category', 'creator', 'description', 'file_size', 'frame_rate', 'frame_src_path', 'gpt3_embeddings', 'has_audio', 'keywords', 'original_resolution', 'resnet50_embeddings', 'summary']
df = pd.DataFrame(columns=df_columns)


In [None]:

# Function to fetch videos and update DataFrame
def fetch_videos():
    # Fetch videos from your source
    # Update the DataFrame df here
    pass


In [None]:
import requests
import json
import os

def analyze_video(api_url, video_index):
    response = requests.get(f'{api_url}/api/v1/videos/{video_index}')
    if response.status_code == 200:
        return response.json()
    else:
        return {'error': 'Failed to analyze video'}
    
# Main Controller
api_url = 'http://your_flask_server_address_here'
video_index = 1  # Replace with actual video index

# Analyze Video
analyzed_video = analyze_video(api_url, video_index)

In [None]:
import cv2
import pandas as pd

# Create a DataFrame to hold video metadata
df = pd.DataFrame(columns=['filename', 'src_path', 'frame_rate', 'file_size', 'original_resolution', 'has_audio'])

def extract_structural_metadata(video_paths):
    global df
    for video_path in video_paths:
        # Open video file
        cap = cv2.VideoCapture(video_path)
        
        # Check if video opened successfully
        if not cap.isOpened():
            print(f"Error opening video file {video_path}")
            continue

        # Get video details
        frame_rate = cap.get(cv2.CAP_PROP_FPS)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        original_resolution = f"{width}x{height}"
        
        # Check for audio
        # OpenCV doesn't provide a direct way to check for audio, 
        # so you may need to use other libraries or custom logic here
        has_audio = None

        # Get file size in bytes
        file_size = None  # Use appropriate method to get file size
        
        # Update DataFrame
        df = df.append({
            'filename': video_path.split('/')[-1],
            'src_path': video_path,
            'frame_rate': frame_rate,
            'file_size': file_size,
            'original_resolution': original_resolution,
            'has_audio': has_audio
        }, ignore_index=True)
        
        # Release the video capture object
        cap.release()

# Example usage
video_paths = ['path/to/video1.mp4', 'path/to/video2.mp4']
extract_structural_metadata(video_paths)

df


In [None]:
import cv2
import os
import pandas as pd

def extract_keyframes(df):
    temp_dir = "temp_frames"
    if not os.path.exists(temp_dir):
        os.makedirs(temp_dir)
    
    frame_src_paths = []
    
    for index, row in df.iterrows():
        video_path = row['src_path']
        cap = cv2.VideoCapture(video_path)
        
        # Create a VideoCapture object
        success, image = cap.read()
        
        # Loop to go through the frames and save keyframes
        count = 0
        frame_count = 0
        while success:
            success, image = cap.read()
            if frame_count % 50 == 0:  # Save every 50th frame as a keyframe
                frame_file_path = os.path.join(temp_dir, f"frame_{count}_{row['filename']}.jpg")
                cv2.imwrite(frame_file_path, image)  # Save frame as JPEG file
                frame_src_paths.append(frame_file_path)
                count += 1
            frame_count += 1

    # Update DataFrame with frame_src_path
    df['frame_src_path'] = frame_src_paths

# Sample DataFrame
df = pd.DataFrame({
    'filename': ['video1.mp4', 'video2.mp4'],
    'src_path': ['/path/to/video1.mp4', '/path/to/video2.mp4'],
    'frame_rate': [30, 25],
    'file_size': [1024, 2048],
    'original_resolution': ['1920x1080', '1280x720'],
    'has_audio': [True, True]
})

# Call the function to extract keyframes and update the DataFrame
extract_keyframes(df)

In [None]:
!pip install tensorflow pandas

In [None]:
import tensorflow as tf
import numpy as np
import cv2
import pandas as pd

# Initialize the ResNet50 model
resnet_model = tf.keras.applications.ResNet50(
    include_top=False, weights='imagenet', input_shape=(224, 224, 3), pooling='avg')

def get_resnet50_embeddings(image_path):
    # Read the image and resize it to ResNet-50 input dimensions
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))
    img = np.expand_dims(img, axis=0)
    
    # Normalize the image
    img = tf.keras.applications.resnet50.preprocess_input(img)
    
    # Get the embedding (Flatten the output to 1D array)
    embedding = resnet_model.predict(img).flatten()
    
    return embedding

def update_dataframe_with_embeddings(df):
    all_embeddings = []
    
    for frame_src_path in df['frame_src_path']:
        embedding = get_resnet50_embeddings(frame_src_path)
        all_embeddings.append(embedding)
    
    # Update DataFrame with resnet50_embeddings
    df['resnet50_embeddings'] = all_embeddings

# Sample DataFrame (Assuming that the 'frame_src_path' column was populated by the previous function)
df = pd.DataFrame({
    'filename': ['video1.mp4', 'video2.mp4'],
    'src_path': ['/path/to/video1.mp4', '/path/to/video2.mp4'],
    'frame_src_path': ['/path/to/frame1.jpg', '/path/to/frame2.jpg'],
    # ... (other columns)
})

# Update the DataFrame with ResNet-50 embeddings
update_dataframe_with_embeddings(df)


# Future Improvements as Task Chain

In [None]:
class TaskCreationChain:
    def __init__(self):
        self.tasks = []
        
    def add_task(self, name, description, priority, additional_notes=None):
        task = {
            'name': name,
            'description': description,
            'priority': priority,
            'status': 'Not Started',
            'additional_notes': additional_notes
        }
        self.tasks.append(task)
    
    def get_next_task(self):
        # Sort tasks based on priority and return the first one
        sorted_tasks = sorted(self.tasks, key=lambda x: x['priority'])
        return sorted_tasks[0] if sorted_tasks else None
    
# Initialize the TaskCreationChain
task_chain = TaskCreationChain()

# Add tasks to the chain
task_chain.add_task('Security & Authentication', 'Implement secure access to Weaviate and Postgres.', 1)
task_chain.add_task('Error Handling', 'Implement comprehensive error handling and logging.', 2)
task_chain.add_task('Data Validation', 'Validate incoming video files and metadata.', 3)
task_chain.add_task('Data Backup', 'Implement data redundancy and backups.', 4)
task_chain.add_task('Monitoring and Alerts', 'Set up monitoring and alerting tools.', 5)
task_chain.add_task('Concurrency & Scalability', 'Handle multiple simultaneous requests.', 6)
task_chain.add_task('Rate Limiting', 'Implement rate limiting.', 7)
task_chain.add_task('Documentation', 'Create detailed API and codebase documentation.', 8)
task_chain.add_task('UUID Management', 'Clarify UUID generation and usage.', 9)
task_chain.add_task('Quality Assurance', 'Ensure metadata accuracy and consider human oversight.', 10)
task_chain.add_task('Compliance', 'Check GDPR and other data protection regulations.', 11)
task_chain.add_task('Versioning', 'Implement API and database schema versioning.', 12)
task_chain.add_task('Batch Processing', 'Handle batch video upload and metadata extraction.', 13)
task_chain.add_task('Cost Estimation', 'Estimate running costs of services.', 14)
task_chain.add_task('Metadata Standards', 'Adhere to metadata standards for video files.', 15)
task_chain.add_task('Testing', 'Implement unit, integration, and end-to-end tests.', 16)
task_chain.add_task('Data Retention Policy', 'Define data retention and purging policies.', 17)
task_chain.add_task('User Interface', 'Develop/Improve User Interface.', 18)

# Retrieve the next task to execute
next_task = task_chain.get_next_task()
print(f"Next task to execute: {next_task['name']}")

