In [59]:
import csv
import pymongo
import subprocess

# MongoDB connection information
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
db = mongo_client["your_database_name"]

# Function to populate Baselight collection
def populate_baselight_collection(baselight_file):
    baselight_collection = db["baselight"]

    output_data = []

    for line in baselight_file:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        parts = line.split()
        folder = parts[0]
        frames = [int(frame) for frame in parts[1:] if frame.isdigit()]
        output_data.append({"folder": folder, "frames": frames})

    # Insert data into Baselight collection
    baselight_collection.insert_many(output_data)

def populate_xytech_collection(xytech_file):
    xytech_collection = db["xytech"]
    output_data = []

    workorder = None
    location = []
    notes = []

    for line in xytech_file:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        if line.startswith("Xytech Workorder"):
            if workorder is not None:  # Save previous data if any
                output_data.append({"workorder": workorder, "location": location, "notes": "\n".join(notes)})
                location = []  # Reset location for next entry
                notes = []  # Reset notes for next entry
            workorder = line.split()[-1]  # Extract workorder number
        elif line.startswith("Location:"):
            continue  # Skip Location: line
        elif line.startswith("Notes:"):
            continue  # Skip Notes: line
        else:
            # Assume it's a location path or a note
            if line.startswith("/"):  # Check if it's a location path
                location.append(line)  # Add to location list
            else:
                notes.append(line)  # Add to notes list

    # Save the last entry after loop ends
    if workorder is not None:
        output_data.append({"workorder": workorder, "location": location, "notes": "\n".join(notes)})

    # Insert data into Xytech collection
    xytech_collection.insert_many(output_data)


#proj1 script
def process_collections():
    # Query the database for Baselight collections
    baselight_collection = db["baselight"]
    cursor = baselight_collection.find({}, {"_id": 0, "folder": 1, "frames": 1})

    output_ranges = []

    # Process each document in the Baselight collection
    for document in cursor:
        currentFolder = document["folder"]
        parseline = [str(frame) for frame in document["frames"]]  # Convert frames to strings

        parseFolder = currentFolder.split("/")  # Split current folder by "/"
        if len(parseFolder) > 1:
            parseFolder.pop(1)  # Remove the second element if exists
        newFolder = "/".join(parseFolder)  # Reconstruct the folder path

        for techfile in XY_File:
            if newFolder in techfile:
                currentFolder = techfile.strip()

        tempStart = None
        tempLast = None
        for number in parseline:
            if not number.isdigit():
                continue
            number = int(number)
            if tempStart is None:
                tempStart = number
                tempLast = number
            elif number == tempLast + 1:
                tempLast = number
            else:
                if tempStart == tempLast:
                    output_ranges.append((currentFolder, tempStart))
                else:
                    output_ranges.append((currentFolder, f"{tempStart}-{tempLast}"))
                tempStart = number
                tempLast = number
        if tempStart is not None:
            if tempStart == tempLast:
                output_ranges.append((currentFolder, tempStart))
            else:
                output_ranges.append((currentFolder, f"{tempStart}-{tempLast}"))

    return output_ranges

# Function to get video duration using ffmpeg
import subprocess
import re

def get_video_duration(video_file):
    command = ['ffprobe', '-v', 'error', '-select_streams', 'v:0', '-show_entries', 'stream=duration,r_frame_rate', '-of', 'csv=p=0', video_file]
    try:
        output = subprocess.check_output(command, stderr=subprocess.STDOUT).decode('utf-8').strip()
        parts = output.split(',')
        if len(parts) > 1:
            duration_str = parts[0]
            if '/' in duration_str:
                numerator, denominator = map(float, duration_str.split('/'))
                duration = numerator / denominator
            else:
                duration = float(duration_str)
            frame_rate_str = parts[1]
            if '/' in frame_rate_str:
                numerator, denominator = map(float, frame_rate_str.split('/'))
                frame_rate = numerator / denominator
            else:
                frame_rate = float(frame_rate_str)
        else:
            duration = float(parts[0])
            frame_rate = 24  # Assuming default frame rate as 24 fps
        video_duration = duration * frame_rate
        return video_duration
    except subprocess.CalledProcessError as e:
        print(f"Error: Failed to get video duration for {video_file}.")
        print(f"Command: {' '.join(command)}")
        print(f"Error message: {e.output.decode('utf-8').strip()}")
        return None


# Function to process collections and filter frames based on video duration
def process_collectionsVideo(video_file):
    # Get video duration
    video_duration = get_video_duration(video_file)

    # Get valid frames from collections
    valid_ranges = process_collections()

    # Filter frames based on video duration
    valid_frames = []
    for folder, frames in valid_ranges:
        if isinstance(frames, int):
            if frames < video_duration:
                valid_frames.append((folder, frames))
        else:
            start_frame, end_frame = map(int, frames.split('-'))
            if start_frame < video_duration:
                valid_frames.append((folder, frames))

    # Print valid frames with timecodes for current folder
    for folder, frames in valid_frames:
        if isinstance(frames, int):
            continue
        else:
            print(f"Location: {folder}")
            print("Valid Frames to fix:")
            start_frame, end_frame = map(int, frames.split('-'))
            print(f"Frame {start_frame} to Frame {end_frame}:")
            for frame in range(start_frame, end_frame + 1):
                print(f"  {frame_to_timecode(frame)}")
        print()
        
# Function to convert frame number to timecode
def frame_to_timecode(frame):
    fps = 60  # Assuming 24 frames per second
    total_seconds = frame / fps
    hours = int(total_seconds // 3600)
    minutes = int((total_seconds % 3600) // 60)
    seconds = int(total_seconds % 60)
    frames = int(frame % fps)
    return f"{hours:02d}:{minutes:02d}:{seconds:02d}:{frames:02d}"
            
# Paths to Baselight and Xytech files
baselight_file_path = "Baselight_export.txt"
xytech_file_path = "Xytech.txt"
video_file_path = "twitch_nft_demo.mp4"
BL_File = open("Baselight_export.txt", "r")  # Open Baselight file

#Xytech
with open("Xytech.txt") as f:
    XY_File = f.read().splitlines()

# Open Baselight file
with open(baselight_file_path, "r") as baselight_file:
    populate_baselight_collection(baselight_file)

# Open Xytech file
with open(xytech_file_path, "r") as xytech_file:
    populate_xytech_collection(xytech_file)

# Process collections and check frames against video duration
process_collectionsVideo(video_file_path)


Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Valid Frames to fix:
Frame 2 to Frame 4:
  00:00:00:02
  00:00:00:03
  00:00:00:04

Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Valid Frames to fix:
Frame 31 to Frame 33:
  00:00:00:31
  00:00:00:32
  00:00:00:33

Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Valid Frames to fix:
Frame 67 to Frame 70:
  00:00:01:07
  00:00:01:08
  00:00:01:09
  00:00:01:10

Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Valid Frames to fix:
Frame 122 to Frame 123:
  00:00:02:02
  00:00:02:03

Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Valid Frames to fix:
Frame 1111 to Frame 1112:
  00:00:18:31
  00:00:18:32

Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Valid Frames to fix:
Frame 1201 to Frame 1205:
  00:00:20:01
  00:00:20:02
  00:00:20:03
  00:00:20:04
  00:00:20:05

Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Valid Frames to fix:
Frame 1211 to Frame 1215:
  

In [68]:
import csv
import pymongo
import subprocess
import pandas as pd

# MongoDB connection information
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
db = mongo_client["your_database_name"]

# Function to populate Baselight collection
def populate_baselight_collection(baselight_file):
    baselight_collection = db["baselight"]

    output_data = []

    for line in baselight_file:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        parts = line.split()
        folder = parts[0]
        frames = [int(frame) for frame in parts[1:] if frame.isdigit()]
        output_data.append({"folder": folder, "frames": frames})

    # Insert data into Baselight collection
    baselight_collection.insert_many(output_data)

def populate_xytech_collection(xytech_file):
    xytech_collection = db["xytech"]
    output_data = []

    workorder = None
    location = []
    notes = []

    for line in xytech_file:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        if line.startswith("Xytech Workorder"):
            if workorder is not None:  # Save previous data if any
                output_data.append({"workorder": workorder, "location": location, "notes": "\n".join(notes)})
                location = []  # Reset location for next entry
                notes = []  # Reset notes for next entry
            workorder = line.split()[-1]  # Extract workorder number
        elif line.startswith("Location:"):
            continue  # Skip Location: line
        elif line.startswith("Notes:"):
            continue  # Skip Notes: line
        else:
            # Assume it's a location path or a note
            if line.startswith("/"):  # Check if it's a location path
                location.append(line)  # Add to location list
            else:
                notes.append(line)  # Add to notes list

    # Save the last entry after loop ends
    if workorder is not None:
        output_data.append({"workorder": workorder, "location": location, "notes": "\n".join(notes)})

    # Insert data into Xytech collection
    xytech_collection.insert_many(output_data)


#proj1 script
def process_collections():
    # Query the database for Baselight collections
    baselight_collection = db["baselight"]
    cursor = baselight_collection.find({}, {"_id": 0, "folder": 1, "frames": 1})

    output_ranges = []

    # Process each document in the Baselight collection
    for document in cursor:
        currentFolder = document["folder"]
        parseline = [str(frame) for frame in document["frames"]]  # Convert frames to strings

        parseFolder = currentFolder.split("/")  # Split current folder by "/"
        if len(parseFolder) > 1:
            parseFolder.pop(1)  # Remove the second element if exists
        newFolder = "/".join(parseFolder)  # Reconstruct the folder path

        for techfile in XY_File:
            if newFolder in techfile:
                currentFolder = techfile.strip()

        tempStart = None
        tempLast = None
        for number in parseline:
            if not number.isdigit():
                continue
            number = int(number)
            if tempStart is None:
                tempStart = number
                tempLast = number
            elif number == tempLast + 1:
                tempLast = number
            else:
                if tempStart == tempLast:
                    output_ranges.append((currentFolder, tempStart))
                else:
                    output_ranges.append((currentFolder, f"{tempStart}-{tempLast}"))
                tempStart = number
                tempLast = number
        if tempStart is not None:
            if tempStart == tempLast:
                output_ranges.append((currentFolder, tempStart))
            else:
                output_ranges.append((currentFolder, f"{tempStart}-{tempLast}"))

    return output_ranges

# Function to get video duration using ffmpeg
import subprocess
import re

def get_video_duration(video_file):
    command = ['ffprobe', '-v', 'error', '-select_streams', 'v:0', '-show_entries', 'stream=duration,r_frame_rate', '-of', 'csv=p=0', video_file]
    try:
        output = subprocess.check_output(command, stderr=subprocess.STDOUT).decode('utf-8').strip()
        parts = output.split(',')
        if len(parts) > 1:
            duration_str = parts[0]
            if '/' in duration_str:
                numerator, denominator = map(float, duration_str.split('/'))
                duration = numerator / denominator
            else:
                duration = float(duration_str)
            frame_rate_str = parts[1]
            if '/' in frame_rate_str:
                numerator, denominator = map(float, frame_rate_str.split('/'))
                frame_rate = numerator / denominator
            else:
                frame_rate = float(frame_rate_str)
        else:
            duration = float(parts[0])
            frame_rate = 24  # Assuming default frame rate as 24 fps
        video_duration = duration * frame_rate
        return video_duration
    except subprocess.CalledProcessError as e:
        print(f"Error: Failed to get video duration for {video_file}.")
        print(f"Command: {' '.join(command)}")
        print(f"Error message: {e.output.decode('utf-8').strip()}")
        return None


def process_collectionsVideo(video_file):
    # Get video duration
    video_duration = get_video_duration(video_file)

    # Get valid frames from collections
    valid_frames_with_timecodes = []

    # Get valid frames from collections
    valid_ranges = process_collections()

    # Filter frames based on video duration
    for folder, frames in valid_ranges:
        if isinstance(frames, int):
            if frames < video_duration:
                valid_frames_with_timecodes.append((folder, frames, frame_to_timecode(frames)))
        else:
            start_frame, end_frame = map(int, frames.split('-'))
            if start_frame < video_duration:
                print("Valid Frames to fix:")
                print_timecode_range(folder, start_frame, end_frame)
            

def print_timecode_range(folder, start_frame, end_frame):
    start_timecode = frame_to_timecode(start_frame)
    end_timecode = frame_to_timecode(end_frame)
    print(f"Location: {folder}")
    print(f"Range: {start_frame} - {end_frame}")
    print(f"Timecode Range: {start_timecode} - {end_timecode}\n")



# Function to convert frame number to timecode
def frame_to_timecode(frame):
    fps = 60  # Assuming 24 frames per second
    total_seconds = frame / fps
    hours = int(total_seconds // 3600)
    minutes = int((total_seconds % 3600) // 60)
    seconds = int(total_seconds % 60)
    frames = int(frame % fps)
    return f"{hours:02d}:{minutes:02d}:{seconds:02d}:{frames:02d}"

            
def export_to_xls(data, output_file):
    # Create DataFrame
    df = pd.DataFrame(data, columns=["Location", "Frames", "Timecode"])

    # Save DataFrame to Excel file
    df.to_excel(output_file, index=False)

    print(f"Data exported to {output_file}")

output_file_path = "output.xlsx"

valid_frames = process_collectionsVideo(video_file_path)
export_to_xls(valid_frames, output_file_path)
            
# Paths to Baselight and Xytech files
baselight_file_path = "Baselight_export.txt"
xytech_file_path = "Xytech.txt"
video_file_path = "twitch_nft_demo.mp4"
BL_File = open("Baselight_export.txt", "r")  # Open Baselight file

#Xytech
with open("Xytech.txt") as f:
    XY_File = f.read().splitlines()

# Open Baselight file
with open(baselight_file_path, "r") as baselight_file:
    populate_baselight_collection(baselight_file)

# Open Xytech file
with open(xytech_file_path, "r") as xytech_file:
    populate_xytech_collection(xytech_file)

# Process collections and check frames against video duration
process_collectionsVideo(video_file_path)


Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 2 - 4
Timecode Range: 00:00:00:02 - 00:00:00:04

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 31 - 33
Timecode Range: 00:00:00:31 - 00:00:00:33

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 67 - 70
Timecode Range: 00:00:01:07 - 00:00:01:10

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 122 - 123
Timecode Range: 00:00:02:02 - 00:00:02:03

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1111 - 1112
Timecode Range: 00:00:18:31 - 00:00:18:32

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1201 - 1205
Timecode Range: 00:00:20:01 - 00:00:20:05

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1211 - 1215
Timecode Range: 00:00:20:11 - 00:00:20:15

Valid Frames to fix:
L

In [69]:
import csv
import pymongo
import subprocess
import pandas as pd

# MongoDB connection information
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
db = mongo_client["your_database_name"]

# Function to populate Baselight collection
def populate_baselight_collection(baselight_file):
    baselight_collection = db["baselight"]

    output_data = []

    for line in baselight_file:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        parts = line.split()
        folder = parts[0]
        frames = [int(frame) for frame in parts[1:] if frame.isdigit()]
        output_data.append({"folder": folder, "frames": frames})

    # Insert data into Baselight collection
    baselight_collection.insert_many(output_data)

def populate_xytech_collection(xytech_file):
    xytech_collection = db["xytech"]
    output_data = []

    workorder = None
    location = []
    notes = []

    for line in xytech_file:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        if line.startswith("Xytech Workorder"):
            if workorder is not None:  # Save previous data if any
                output_data.append({"workorder": workorder, "location": location, "notes": "\n".join(notes)})
                location = []  # Reset location for next entry
                notes = []  # Reset notes for next entry
            workorder = line.split()[-1]  # Extract workorder number
        elif line.startswith("Location:"):
            continue  # Skip Location: line
        elif line.startswith("Notes:"):
            continue  # Skip Notes: line
        else:
            # Assume it's a location path or a note
            if line.startswith("/"):  # Check if it's a location path
                location.append(line)  # Add to location list
            else:
                notes.append(line)  # Add to notes list

    # Save the last entry after loop ends
    if workorder is not None:
        output_data.append({"workorder": workorder, "location": location, "notes": "\n".join(notes)})

    # Insert data into Xytech collection
    xytech_collection.insert_many(output_data)


#proj1 script
def process_collections():
    # Query the database for Baselight collections
    baselight_collection = db["baselight"]
    cursor = baselight_collection.find({}, {"_id": 0, "folder": 1, "frames": 1})

    output_ranges = []

    # Process each document in the Baselight collection
    for document in cursor:
        currentFolder = document["folder"]
        parseline = [str(frame) for frame in document["frames"]]  # Convert frames to strings

        parseFolder = currentFolder.split("/")  # Split current folder by "/"
        if len(parseFolder) > 1:
            parseFolder.pop(1)  # Remove the second element if exists
        newFolder = "/".join(parseFolder)  # Reconstruct the folder path

        for techfile in XY_File:
            if newFolder in techfile:
                currentFolder = techfile.strip()

        tempStart = None
        tempLast = None
        for number in parseline:
            if not number.isdigit():
                continue
            number = int(number)
            if tempStart is None:
                tempStart = number
                tempLast = number
            elif number == tempLast + 1:
                tempLast = number
            else:
                if tempStart == tempLast:
                    output_ranges.append((currentFolder, tempStart))
                else:
                    output_ranges.append((currentFolder, f"{tempStart}-{tempLast}"))
                tempStart = number
                tempLast = number
        if tempStart is not None:
            if tempStart == tempLast:
                output_ranges.append((currentFolder, tempStart))
            else:
                output_ranges.append((currentFolder, f"{tempStart}-{tempLast}"))

    return output_ranges

# Function to get video duration using ffmpeg
import subprocess
import re

def get_video_duration(video_file):
    command = ['ffprobe', '-v', 'error', '-select_streams', 'v:0', '-show_entries', 'stream=duration,r_frame_rate', '-of', 'csv=p=0', video_file]
    try:
        output = subprocess.check_output(command, stderr=subprocess.STDOUT).decode('utf-8').strip()
        parts = output.split(',')
        if len(parts) > 1:
            duration_str = parts[0]
            if '/' in duration_str:
                numerator, denominator = map(float, duration_str.split('/'))
                duration = numerator / denominator
            else:
                duration = float(duration_str)
            frame_rate_str = parts[1]
            if '/' in frame_rate_str:
                numerator, denominator = map(float, frame_rate_str.split('/'))
                frame_rate = numerator / denominator
            else:
                frame_rate = float(frame_rate_str)
        else:
            duration = float(parts[0])
            frame_rate = 24  # Assuming default frame rate as 24 fps
        video_duration = duration * frame_rate
        return video_duration
    except subprocess.CalledProcessError as e:
        print(f"Error: Failed to get video duration for {video_file}.")
        print(f"Command: {' '.join(command)}")
        print(f"Error message: {e.output.decode('utf-8').strip()}")
        return None


def process_collectionsVideo(video_file):
    # Get video duration
    video_duration = get_video_duration(video_file)

    # Get valid frames from collections
    valid_frames_with_timecodes = []

    # Get valid frames from collections
    valid_ranges = process_collections()

    # Filter frames based on video duration
    for folder, frames in valid_ranges:
        if isinstance(frames, int):
            if frames < video_duration:
                valid_frames_with_timecodes.append((folder, frames, frame_to_timecode(frames)))
        else:
            start_frame, end_frame = map(int, frames.split('-'))
            if start_frame < video_duration:
                print("Valid Frames to fix:")
                print_timecode_range(folder, start_frame, end_frame)
                valid_frames_with_timecodes.append((folder, f"{start_frame}-{end_frame}", f"{frame_to_timecode(start_frame)}-{frame_to_timecode(end_frame)}"))

    return valid_frames_with_timecodes

def print_timecode_range(folder, start_frame, end_frame):
    start_timecode = frame_to_timecode(start_frame)
    end_timecode = frame_to_timecode(end_frame)
    print(f"Location: {folder}")
    print(f"Range: {start_frame} - {end_frame}")
    print(f"Timecode Range: {start_timecode} - {end_timecode}\n")

# Function to convert frame number to timecode
def frame_to_timecode(frame):
    fps = 60  # Assuming 24 frames per second
    total_seconds = frame / fps
    hours = int(total_seconds // 3600)
    minutes = int((total_seconds % 3600) // 60)
    seconds = int(total_seconds % 60)
    frames = int(frame % fps)
    return f"{hours:02d}:{minutes:02d}:{seconds:02d}:{frames:02d}"

            
def export_to_xls(data, output_file):
    # Create DataFrame
    df = pd.DataFrame(data, columns=["Location", "Frames", "Timecode"])

    # Save DataFrame to Excel file
    df.to_excel(output_file, index=False)

    print(f"Data exported to {output_file}")

output_file_path = "output.xlsx"

valid_frames = process_collectionsVideo(video_file_path)
export_to_xls(valid_frames, output_file_path)
            
# Paths to Baselight and Xytech files
baselight_file_path = "Baselight_export.txt"
xytech_file_path = "Xytech.txt"
video_file_path = "twitch_nft_demo.mp4"
BL_File = open("Baselight_export.txt", "r")  # Open Baselight file

#Xytech
with open("Xytech.txt") as f:
    XY_File = f.read().splitlines()

# Open Baselight file
with open(baselight_file_path, "r") as baselight_file:
    populate_baselight_collection(baselight_file)

# Open Xytech file
with open(xytech_file_path, "r") as xytech_file:
    populate_xytech_collection(xytech_file)

# Process collections and check frames against video duration
process_collectionsVideo(video_file_path)


Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 2 - 4
Timecode Range: 00:00:00:02 - 00:00:00:04

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 31 - 33
Timecode Range: 00:00:00:31 - 00:00:00:33

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 67 - 70
Timecode Range: 00:00:01:07 - 00:00:01:10

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 122 - 123
Timecode Range: 00:00:02:02 - 00:00:02:03

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1111 - 1112
Timecode Range: 00:00:18:31 - 00:00:18:32

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1201 - 1205
Timecode Range: 00:00:20:01 - 00:00:20:05

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1211 - 1215
Timecode Range: 00:00:20:11 - 00:00:20:15

Valid Frames to fix:
L

[('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '2-4',
  '00:00:00:02-00:00:00:04'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '31-33',
  '00:00:00:31-00:00:00:33'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '67-70',
  '00:00:01:07-00:00:01:10'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '122-123',
  '00:00:02:02-00:00:02:03'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080', 155, '00:00:02:35'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080', 1023, '00:00:17:03'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '1111-1112',
  '00:00:18:31-00:00:18:32'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080', 1160, '00:00:19:20'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '1201-1205',
  '00:00:20:01-00:00:20:05'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '1211-1215',
  '00:00:20:11-00:00:20:15'),
 ('/hpsans12/production/Dune2/reel1/VFX/Hydraulx',
  '1251-1253',
  '00:00:20:51-00:00:20:53'),


In [70]:
import csv
import pymongo
import subprocess
import pandas as pd

# MongoDB connection information
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
db = mongo_client["your_database_name"]

# Function to populate Baselight collection
def populate_baselight_collection(baselight_file):
    baselight_collection = db["baselight"]

    output_data = []

    for line in baselight_file:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        parts = line.split()
        folder = parts[0]
        frames = [int(frame) for frame in parts[1:] if frame.isdigit()]
        output_data.append({"folder": folder, "frames": frames})

    # Insert data into Baselight collection
    baselight_collection.insert_many(output_data)

def populate_xytech_collection(xytech_file):
    xytech_collection = db["xytech"]
    output_data = []

    workorder = None
    location = []
    notes = []

    for line in xytech_file:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        if line.startswith("Xytech Workorder"):
            if workorder is not None:  # Save previous data if any
                output_data.append({"workorder": workorder, "location": location, "notes": "\n".join(notes)})
                location = []  # Reset location for next entry
                notes = []  # Reset notes for next entry
            workorder = line.split()[-1]  # Extract workorder number
        elif line.startswith("Location:"):
            continue  # Skip Location: line
        elif line.startswith("Notes:"):
            continue  # Skip Notes: line
        else:
            # Assume it's a location path or a note
            if line.startswith("/"):  # Check if it's a location path
                location.append(line)  # Add to location list
            else:
                notes.append(line)  # Add to notes list

    # Save the last entry after loop ends
    if workorder is not None:
        output_data.append({"workorder": workorder, "location": location, "notes": "\n".join(notes)})

    # Insert data into Xytech collection
    xytech_collection.insert_many(output_data)


#proj1 script
def process_collections():
    # Query the database for Baselight collections
    baselight_collection = db["baselight"]
    cursor = baselight_collection.find({}, {"_id": 0, "folder": 1, "frames": 1})

    output_ranges = []

    # Process each document in the Baselight collection
    for document in cursor:
        currentFolder = document["folder"]
        parseline = [str(frame) for frame in document["frames"]]  # Convert frames to strings

        parseFolder = currentFolder.split("/")  # Split current folder by "/"
        if len(parseFolder) > 1:
            parseFolder.pop(1)  # Remove the second element if exists
        newFolder = "/".join(parseFolder)  # Reconstruct the folder path

        for techfile in XY_File:
            if newFolder in techfile:
                currentFolder = techfile.strip()

        tempStart = None
        tempLast = None
        for number in parseline:
            if not number.isdigit():
                continue
            number = int(number)
            if tempStart is None:
                tempStart = number
                tempLast = number
            elif number == tempLast + 1:
                tempLast = number
            else:
                if tempStart == tempLast:
                    output_ranges.append((currentFolder, tempStart))
                else:
                    output_ranges.append((currentFolder, f"{tempStart}-{tempLast}"))
                tempStart = number
                tempLast = number
        if tempStart is not None:
            if tempStart == tempLast:
                output_ranges.append((currentFolder, tempStart))
            else:
                output_ranges.append((currentFolder, f"{tempStart}-{tempLast}"))

    return output_ranges

# Function to get video duration using ffmpeg
import subprocess
import re

def get_video_duration(video_file):
    command = ['ffprobe', '-v', 'error', '-select_streams', 'v:0', '-show_entries', 'stream=duration,r_frame_rate', '-of', 'csv=p=0', video_file]
    try:
        output = subprocess.check_output(command, stderr=subprocess.STDOUT).decode('utf-8').strip()
        parts = output.split(',')
        if len(parts) > 1:
            duration_str = parts[0]
            if '/' in duration_str:
                numerator, denominator = map(float, duration_str.split('/'))
                duration = numerator / denominator
            else:
                duration = float(duration_str)
            frame_rate_str = parts[1]
            if '/' in frame_rate_str:
                numerator, denominator = map(float, frame_rate_str.split('/'))
                frame_rate = numerator / denominator
            else:
                frame_rate = float(frame_rate_str)
        else:
            duration = float(parts[0])
            frame_rate = 24  # Assuming default frame rate as 24 fps
        video_duration = duration * frame_rate
        return video_duration
    except subprocess.CalledProcessError as e:
        print(f"Error: Failed to get video duration for {video_file}.")
        print(f"Command: {' '.join(command)}")
        print(f"Error message: {e.output.decode('utf-8').strip()}")
        return None


def process_collectionsVideo(video_file):
    # Get video duration
    video_duration = get_video_duration(video_file)

    # Get valid frames from collections
    valid_frames_with_timecodes = []

    # Get valid frames from collections
    valid_ranges = process_collections()

    # Filter frames based on video duration
    for folder, frames in valid_ranges:
        if isinstance(frames, int):
            if frames < video_duration:
                valid_frames_with_timecodes.append((folder, frames, frame_to_timecode(frames), "Valid"))
        else:
            start_frame, end_frame = map(int, frames.split('-'))
            if start_frame < video_duration:
                print("Valid Frames to fix:")
                print_timecode_range(folder, start_frame, end_frame)
                valid_frames_with_timecodes.append((folder, f"{start_frame}-{end_frame}", f"{frame_to_timecode(start_frame)}-{frame_to_timecode(end_frame)}", "Valid"))

    return valid_frames_with_timecodes

def print_timecode_range(folder, start_frame, end_frame):
    start_timecode = frame_to_timecode(start_frame)
    end_timecode = frame_to_timecode(end_frame)
    print(f"Location: {folder}")
    print(f"Range: {start_frame} - {end_frame}")
    print(f"Timecode Range: {start_timecode} - {end_timecode}\n")

# Function to convert frame number to timecode
def frame_to_timecode(frame):
    fps = 60  # Assuming 24 frames per second
    total_seconds = frame / fps
    hours = int(total_seconds // 3600)
    minutes = int((total_seconds % 3600) // 60)
    seconds = int(total_seconds % 60)
    frames = int(frame % fps)
    return f"{hours:02d}:{minutes:02d}:{seconds:02d}:{frames:02d}"

            
def export_to_xls(data, output_file):
    # Create DataFrame
    df = pd.DataFrame(data, columns=["Location", "Frames", "Timecode", "Status"])

    # Save DataFrame to Excel file
    df.to_excel(output_file, index=False)

    print(f"Data exported to {output_file}")

output_file_path = "output.xlsx"

valid_frames = process_collectionsVideo(video_file_path)
export_to_xls(valid_frames, output_file_path)
            
# Paths to Baselight and Xytech files
baselight_file_path = "Baselight_export.txt"
xytech_file_path = "Xytech.txt"
video_file_path = "twitch_nft_demo.mp4"
BL_File = open("Baselight_export.txt", "r")  # Open Baselight file

#Xytech
with open("Xytech.txt") as f:
    XY_File = f.read().splitlines()

# Open Baselight file
with open(baselight_file_path, "r") as baselight_file:
    populate_baselight_collection(baselight_file)

# Open Xytech file
with open(xytech_file_path, "r") as xytech_file:
    populate_xytech_collection(xytech_file)

# Process collections and check frames against video duration
process_collectionsVideo(video_file_path)


Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 2 - 4
Timecode Range: 00:00:00:02 - 00:00:00:04

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 31 - 33
Timecode Range: 00:00:00:31 - 00:00:00:33

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 67 - 70
Timecode Range: 00:00:01:07 - 00:00:01:10

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 122 - 123
Timecode Range: 00:00:02:02 - 00:00:02:03

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1111 - 1112
Timecode Range: 00:00:18:31 - 00:00:18:32

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1201 - 1205
Timecode Range: 00:00:20:01 - 00:00:20:05

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1211 - 1215
Timecode Range: 00:00:20:11 - 00:00:20:15

Valid Frames to fix:
L

[('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '2-4',
  '00:00:00:02-00:00:00:04',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '31-33',
  '00:00:00:31-00:00:00:33',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '67-70',
  '00:00:01:07-00:00:01:10',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '122-123',
  '00:00:02:02-00:00:02:03',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  155,
  '00:00:02:35',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  1023,
  '00:00:17:03',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '1111-1112',
  '00:00:18:31-00:00:18:32',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  1160,
  '00:00:19:20',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '1201-1205',
  '00:00:20:01-00:00:20:05',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '1211-1215',
  '00:00:20:11-

In [85]:
import csv
import pymongo
import subprocess
import pandas as pd

# MongoDB connection information
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
db = mongo_client["your_database_name"]

# Function to populate Baselight collection
def populate_baselight_collection(baselight_file):
    baselight_collection = db["baselight"]

    output_data = []

    for line in baselight_file:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        parts = line.split()
        folder = parts[0]
        frames = [int(frame) for frame in parts[1:] if frame.isdigit()]
        output_data.append({"folder": folder, "frames": frames})

    # Insert data into Baselight collection
    baselight_collection.insert_many(output_data)

def populate_xytech_collection(xytech_file):
    xytech_collection = db["xytech"]
    output_data = []

    workorder = None
    location = []
    notes = []

    for line in xytech_file:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        if line.startswith("Xytech Workorder"):
            if workorder is not None:  # Save previous data if any
                output_data.append({"workorder": workorder, "location": location, "notes": "\n".join(notes)})
                location = []  # Reset location for next entry
                notes = []  # Reset notes for next entry
            workorder = line.split()[-1]  # Extract workorder number
        elif line.startswith("Location:"):
            continue  # Skip Location: line
        elif line.startswith("Notes:"):
            continue  # Skip Notes: line
        else:
            # Assume it's a location path or a note
            if line.startswith("/"):  # Check if it's a location path
                location.append(line)  # Add to location list
            else:
                notes.append(line)  # Add to notes list

    # Save the last entry after loop ends
    if workorder is not None:
        output_data.append({"workorder": workorder, "location": location, "notes": "\n".join(notes)})

    # Insert data into Xytech collection
    xytech_collection.insert_many(output_data)


#proj1 script
def process_collections():
    # Query the database for Baselight collections
    baselight_collection = db["baselight"]
    cursor = baselight_collection.find({}, {"_id": 0, "folder": 1, "frames": 1})

    output_ranges = []

    # Process each document in the Baselight collection
    for document in cursor:
        currentFolder = document["folder"]
        parseline = [str(frame) for frame in document["frames"]]  # Convert frames to strings

        parseFolder = currentFolder.split("/")  # Split current folder by "/"
        if len(parseFolder) > 1:
            parseFolder.pop(1)  # Remove the second element if exists
        newFolder = "/".join(parseFolder)  # Reconstruct the folder path

        for techfile in XY_File:
            if newFolder in techfile:
                currentFolder = techfile.strip()

        tempStart = None
        tempLast = None
        for number in parseline:
            if not number.isdigit():
                continue
            number = int(number)
            if tempStart is None:
                tempStart = number
                tempLast = number
            elif number == tempLast + 1:
                tempLast = number
            else:
                if tempStart == tempLast:
                    output_ranges.append((currentFolder, tempStart))
                else:
                    output_ranges.append((currentFolder, f"{tempStart}-{tempLast}"))
                tempStart = number
                tempLast = number
        if tempStart is not None:
            if tempStart == tempLast:
                output_ranges.append((currentFolder, tempStart))
            else:
                output_ranges.append((currentFolder, f"{tempStart}-{tempLast}"))

    return output_ranges

# Function to get video duration using ffmpeg
import subprocess
import re

def get_video_duration(video_file):
    command = ['ffprobe', '-v', 'error', '-select_streams', 'v:0', '-show_entries', 'stream=duration,r_frame_rate', '-of', 'csv=p=0', video_file]
    try:
        output = subprocess.check_output(command, stderr=subprocess.STDOUT).decode('utf-8').strip()
        parts = output.split(',')
        if len(parts) > 1:
            duration_str = parts[0]
            if '/' in duration_str:
                numerator, denominator = map(float, duration_str.split('/'))
                duration = numerator / denominator
            else:
                duration = float(duration_str)
            frame_rate_str = parts[1]
            if '/' in frame_rate_str:
                numerator, denominator = map(float, frame_rate_str.split('/'))
                frame_rate = numerator / denominator
            else:
                frame_rate = float(frame_rate_str)
        else:
            duration = float(parts[0])
            frame_rate = 24  # Assuming default frame rate as 24 fps
        video_duration = duration * frame_rate
        return video_duration
    except subprocess.CalledProcessError as e:
        print(f"Error: Failed to get video duration for {video_file}.")
        print(f"Command: {' '.join(command)}")
        print(f"Error message: {e.output.decode('utf-8').strip()}")
        return None


def process_collectionsVideo(video_file):
    # Get video duration
    video_duration = get_video_duration(video_file)

    # Get valid frames from collections
    valid_frames_with_timecodes = []

    # Get valid frames from collections
    valid_ranges = process_collections()

    # Filter frames based on video duration
    for folder, frames in valid_ranges:
        if isinstance(frames, int):
            if frames < video_duration:
                valid_frames_with_timecodes.append((folder, frames, frame_to_timecode(frames), ""))
        else:
            start_frame, end_frame = map(int, frames.split('-'))
            if start_frame < video_duration:
                print("Valid Frames to fix:")
                print_timecode_range(folder, start_frame, end_frame)
                valid_frames_with_timecodes.append((folder, f"{start_frame}-{end_frame}", f"{frame_to_timecode(start_frame)}-{frame_to_timecode(end_frame)}", "Valid"))

    return valid_frames_with_timecodes

def print_timecode_range(folder, start_frame, end_frame):
    start_timecode = frame_to_timecode(start_frame)
    end_timecode = frame_to_timecode(end_frame)
    print(f"Location: {folder}")
    print(f"Range: {start_frame} - {end_frame}")
    print(f"Timecode Range: {start_timecode} - {end_timecode}\n")

# Function to convert frame number to timecode
def frame_to_timecode(frame):
    fps = 60  # Assuming 24 frames per second
    total_seconds = frame / fps
    hours = int(total_seconds // 3600)
    minutes = int((total_seconds % 3600) // 60)
    seconds = int(total_seconds % 60)
    frames = int(frame % fps)
    return f"{hours:02d}:{minutes:02d}:{seconds:02d}:{frames:02d}"

            
def export_to_xls(data, output_file):
    # Create DataFrame
    df = pd.DataFrame(data, columns=["Location", "Frames", "Timecode", "Status"])

    # Save DataFrame to Excel file
    df.to_excel(output_file, index=False)

    print(f"Data exported to {output_file}")

output_file_path = "output.xlsx"

valid_frames = process_collectionsVideo(video_file_path)
export_to_xls(valid_frames, output_file_path)
            
# Paths to Baselight and Xytech files
baselight_file_path = "Baselight_export.txt"
xytech_file_path = "Xytech.txt"
video_file_path = "twitch_nft_demo.mp4"
BL_File = open("Baselight_export.txt", "r")  # Open Baselight file

#Xytech
with open("Xytech.txt") as f:
    XY_File = f.read().splitlines()

# Open Baselight file
with open(baselight_file_path, "r") as baselight_file:
    populate_baselight_collection(baselight_file)

# Open Xytech file
with open(xytech_file_path, "r") as xytech_file:
    populate_xytech_collection(xytech_file)

# Process collections and check frames against video duration
process_collectionsVideo(video_file_path)


Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 2 - 4
Timecode Range: 00:00:00:02 - 00:00:00:04

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 31 - 33
Timecode Range: 00:00:00:31 - 00:00:00:33

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 67 - 70
Timecode Range: 00:00:01:07 - 00:00:01:10

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 122 - 123
Timecode Range: 00:00:02:02 - 00:00:02:03

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1111 - 1112
Timecode Range: 00:00:18:31 - 00:00:18:32

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1201 - 1205
Timecode Range: 00:00:20:01 - 00:00:20:05

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1211 - 1215
Timecode Range: 00:00:20:11 - 00:00:20:15

Valid Frames to fix:
L

[('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '2-4',
  '00:00:00:02-00:00:00:04',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '31-33',
  '00:00:00:31-00:00:00:33',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '67-70',
  '00:00:01:07-00:00:01:10',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '122-123',
  '00:00:02:02-00:00:02:03',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080', 155, '00:00:02:35', ''),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080', 1023, '00:00:17:03', ''),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '1111-1112',
  '00:00:18:31-00:00:18:32',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080', 1160, '00:00:19:20', ''),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '1201-1205',
  '00:00:20:01-00:00:20:05',
  'Valid'),
 ('/hpsans13/production/Dune2/reel1/partA/1920x1080',
  '1211-1215',
  '00:00:20:11-00:00:20:15',
  'Valid'),
 ('/hps

In [88]:
import csv
import pymongo
import subprocess
import pandas as pd

# MongoDB connection information
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
db = mongo_client["your_database_name"]

# Function to populate Baselight collection
def populate_baselight_collection(baselight_file):
    baselight_collection = db["baselight"]

    output_data = []

    for line in baselight_file:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        parts = line.split()
        folder = parts[0]
        frames = [int(frame) for frame in parts[1:] if frame.isdigit()]
        output_data.append({"folder": folder, "frames": frames})

    # Insert data into Baselight collection
    baselight_collection.insert_many(output_data)

def populate_xytech_collection(xytech_file):
    xytech_collection = db["xytech"]
    output_data = []

    workorder = None
    location = []
    notes = []

    for line in xytech_file:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        if line.startswith("Xytech Workorder"):
            if workorder is not None:  # Save previous data if any
                output_data.append({"workorder": workorder, "location": location, "notes": "\n".join(notes)})
                location = []  # Reset location for next entry
                notes = []  # Reset notes for next entry
            workorder = line.split()[-1]  # Extract workorder number
        elif line.startswith("Location:"):
            continue  # Skip Location: line
        elif line.startswith("Notes:"):
            continue  # Skip Notes: line
        else:
            # Assume it's a location path or a note
            if line.startswith("/"):  # Check if it's a location path
                location.append(line)  # Add to location list
            else:
                notes.append(line)  # Add to notes list

    # Save the last entry after loop ends
    if workorder is not None:
        output_data.append({"workorder": workorder, "location": location, "notes": "\n".join(notes)})

    # Insert data into Xytech collection
    xytech_collection.insert_many(output_data)


#proj1 script
def process_collections():
    # Query the database for Baselight collections
    baselight_collection = db["baselight"]
    cursor = baselight_collection.find({}, {"_id": 0, "folder": 1, "frames": 1})

    output_ranges = []

    # Process each document in the Baselight collection
    for document in cursor:
        currentFolder = document["folder"]
        parseline = [str(frame) for frame in document["frames"]]  # Convert frames to strings

        parseFolder = currentFolder.split("/")  # Split current folder by "/"
        if len(parseFolder) > 1:
            parseFolder.pop(1)  # Remove the second element if exists
        newFolder = "/".join(parseFolder)  # Reconstruct the folder path

        for techfile in XY_File:
            if newFolder in techfile:
                currentFolder = techfile.strip()

        tempStart = None
        tempLast = None
        for number in parseline:
            if not number.isdigit():
                continue
            number = int(number)
            if tempStart is None:
                tempStart = number
                tempLast = number
            elif number == tempLast + 1:
                tempLast = number
            else:
                if tempStart == tempLast:
                    output_ranges.append((currentFolder, tempStart))
                else:
                    output_ranges.append((currentFolder, f"{tempStart}-{tempLast}"))
                tempStart = number
                tempLast = number
        if tempStart is not None:
            if tempStart == tempLast:
                output_ranges.append((currentFolder, tempStart))
            else:
                output_ranges.append((currentFolder, f"{tempStart}-{tempLast}"))

    return output_ranges

# Function to get video duration using ffmpeg
import subprocess
import re

def get_video_duration(video_file):
    command = ['ffprobe', '-v', 'error', '-select_streams', 'v:0', '-show_entries', 'stream=duration,r_frame_rate', '-of', 'csv=p=0', video_file]
    try:
        output = subprocess.check_output(command, stderr=subprocess.STDOUT).decode('utf-8').strip()
        parts = output.split(',')
        if len(parts) > 1:
            duration_str = parts[0]
            if '/' in duration_str:
                numerator, denominator = map(float, duration_str.split('/'))
                duration = numerator / denominator
            else:
                duration = float(duration_str)
            frame_rate_str = parts[1]
            if '/' in frame_rate_str:
                numerator, denominator = map(float, frame_rate_str.split('/'))
                frame_rate = numerator / denominator
            else:
                frame_rate = float(frame_rate_str)
        else:
            duration = float(parts[0])
            frame_rate = 24  # Assuming default frame rate as 24 fps
        video_duration = duration * frame_rate
        return video_duration
    except subprocess.CalledProcessError as e:
        print(f"Error: Failed to get video duration for {video_file}.")
        print(f"Command: {' '.join(command)}")
        print(f"Error message: {e.output.decode('utf-8').strip()}")
        return None


def process_collectionsVideo(video_file):
    # Get video duration
    video_duration = get_video_duration(video_file)

    # Get valid frames from collections
    valid_frames_with_timecodes = []

    # Get valid frames from collections
    valid_ranges = process_collections()

    # Filter frames based on video duration
    for folder, frames in valid_ranges:
        if isinstance(frames, int):
            if frames < video_duration:
                continue
                #valid_frames_with_timecodes.append((folder, frames, frame_to_timecode(frames)))
        else:
            start_frame, end_frame = map(int, frames.split('-'))
            if start_frame < video_duration:
                print_timecode_range(folder, start_frame, end_frame)
                valid_frames_with_timecodes.append((folder, f"{start_frame}-{end_frame}", f"{frame_to_timecode(start_frame)}-{frame_to_timecode(end_frame)}", "Valid"))
    return valid_frames_with_timecodes
            

def print_timecode_range(folder, start_frame, end_frame):
    start_timecode = frame_to_timecode(start_frame)
    end_timecode = frame_to_timecode(end_frame)
    print(f"Location: {folder}")
    print(f"Range: {start_frame} - {end_frame}")
    print(f"Timecode Range: {start_timecode} - {end_timecode}\n")



# Function to convert frame number to timecode
def frame_to_timecode(frame):
    fps = 60  # Assuming 24 frames per second
    total_seconds = frame / fps
    hours = int(total_seconds // 3600)
    minutes = int((total_seconds % 3600) // 60)
    seconds = int(total_seconds % 60)
    frames = int(frame % fps)
    return f"{hours:02d}:{minutes:02d}:{seconds:02d}:{frames:02d}"

            
def export_to_xls(data, output_file):
    # Create DataFrame
    df = pd.DataFrame(data, columns=["Location", "Frames", "Timecode", "Status"])

    # Save DataFrame to Excel file
    df.to_excel(output_file, index=False)

    print(f"Data exported to {output_file}")

# Paths to Baselight and Xytech files
baselight_file_path = "Baselight_export.txt"
xytech_file_path = "Xytech.txt"
video_file_path = "twitch_nft_demo.mp4"
BL_File = open("Baselight_export.txt", "r")  # Open Baselight file

# Xytech
with open("Xytech.txt") as f:
    XY_File = f.read().splitlines()

# Open Baselight file
with open(baselight_file_path, "r") as baselight_file:
    populate_baselight_collection(baselight_file)

# Open Xytech file
with open(xytech_file_path, "r") as xytech_file:
    populate_xytech_collection(xytech_file)

# Process collections and check frames against video duration
thing = process_collectionsVideo(video_file_path)

output_file_path = "output.xlsx"
valid_frames = process_collections()
export_to_xls(thing, output_file_path)


Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 2 - 4
Timecode Range: 00:00:00:02 - 00:00:00:04

Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 31 - 33
Timecode Range: 00:00:00:31 - 00:00:00:33

Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 67 - 70
Timecode Range: 00:00:01:07 - 00:00:01:10

Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 122 - 123
Timecode Range: 00:00:02:02 - 00:00:02:03

Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1111 - 1112
Timecode Range: 00:00:18:31 - 00:00:18:32

Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1201 - 1205
Timecode Range: 00:00:20:01 - 00:00:20:05

Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1211 - 1215
Timecode Range: 00:00:20:11 - 00:00:20:15

Location: /hpsans12/production/Dune2/reel1/VFX/Hydraulx
Range: 1251 - 1253
Timecode Range: 00:00:20:51 - 00:00:20:53

Location: /hpsans12/production/Dune2/reel1/VFX/Hydr

In [83]:
import csv
import pymongo
import subprocess
import pandas as pd

# MongoDB connection information
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
db = mongo_client["your_database_name"]

# Function to populate Baselight collection
def populate_baselight_collection(baselight_file):
    baselight_collection = db["baselight"]

    output_data = []

    for line in baselight_file:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        parts = line.split()
        folder = parts[0]
        frames = [int(frame) for frame in parts[1:] if frame.isdigit()]
        output_data.append({"folder": folder, "frames": frames})

    # Insert data into Baselight collection
    baselight_collection.insert_many(output_data)

def populate_xytech_collection(xytech_file):
    xytech_collection = db["xytech"]
    output_data = []

    workorder = None
    location = []
    notes = []

    for line in xytech_file:
        line = line.strip()
        if not line:
            continue  # Skip empty lines
        if line.startswith("Xytech Workorder"):
            if workorder is not None:  # Save previous data if any
                output_data.append({"workorder": workorder, "location": location, "notes": "\n".join(notes)})
                location = []  # Reset location for next entry
                notes = []  # Reset notes for next entry
            workorder = line.split()[-1]  # Extract workorder number
        elif line.startswith("Location:"):
            continue  # Skip Location: line
        elif line.startswith("Notes:"):
            continue  # Skip Notes: line
        else:
            # Assume it's a location path or a note
            if line.startswith("/"):  # Check if it's a location path
                location.append(line)  # Add to location list
            else:
                notes.append(line)  # Add to notes list

    # Save the last entry after loop ends
    if workorder is not None:
        output_data.append({"workorder": workorder, "location": location, "notes": "\n".join(notes)})

    # Insert data into Xytech collection
    xytech_collection.insert_many(output_data)


#proj1 script
def process_collections():
    # Query the database for Baselight collections
    baselight_collection = db["baselight"]
    cursor = baselight_collection.find({}, {"_id": 0, "folder": 1, "frames": 1})

    output_ranges = []

    # Process each document in the Baselight collection
    for document in cursor:
        currentFolder = document["folder"]
        parseline = [str(frame) for frame in document["frames"]]  # Convert frames to strings

        parseFolder = currentFolder.split("/")  # Split current folder by "/"
        if len(parseFolder) > 1:
            parseFolder.pop(1)  # Remove the second element if exists
        newFolder = "/".join(parseFolder)  # Reconstruct the folder path

        for techfile in XY_File:
            if newFolder in techfile:
                currentFolder = techfile.strip()

        tempStart = None
        tempLast = None
        for number in parseline:
            if not number.isdigit():
                continue
            number = int(number)
            if tempStart is None:
                tempStart = number
                tempLast = number
            elif number == tempLast + 1:
                tempLast = number
            else:
                if tempStart == tempLast:
                    output_ranges.append((currentFolder, tempStart))
                else:
                    output_ranges.append((currentFolder, f"{tempStart}-{tempLast}"))
                tempStart = number
                tempLast = number
        if tempStart is not None:
            if tempStart == tempLast:
                output_ranges.append((currentFolder, tempStart))
            else:
                output_ranges.append((currentFolder, f"{tempStart}-{tempLast}"))

    return output_ranges

# Function to get video duration using ffmpeg
import subprocess
import re

def get_video_duration(video_file):
    command = ['ffprobe', '-v', 'error', '-select_streams', 'v:0', '-show_entries', 'stream=duration,r_frame_rate', '-of', 'csv=p=0', video_file]
    try:
        output = subprocess.check_output(command, stderr=subprocess.STDOUT).decode('utf-8').strip()
        parts = output.split(',')
        if len(parts) > 1:
            duration_str = parts[0]
            if '/' in duration_str:
                numerator, denominator = map(float, duration_str.split('/'))
                duration = numerator / denominator
            else:
                duration = float(duration_str)
            frame_rate_str = parts[1]
            if '/' in frame_rate_str:
                numerator, denominator = map(float, frame_rate_str.split('/'))
                frame_rate = numerator / denominator
            else:
                frame_rate = float(frame_rate_str)
        else:
            duration = float(parts[0])
            frame_rate = 24  # Assuming default frame rate as 24 fps
        video_duration = duration * frame_rate
        return video_duration
    except subprocess.CalledProcessError as e:
        print(f"Error: Failed to get video duration for {video_file}.")
        print(f"Command: {' '.join(command)}")
        print(f"Error message: {e.output.decode('utf-8').strip()}")
        return None


def process_collectionsVideo(video_file):
    # Get video duration
    video_duration = get_video_duration(video_file)

    # Get valid frames from collections
    valid_frames_with_timecodes = []

    # Get valid frames from collections
    valid_ranges = process_collections()

    # Filter frames based on video duration
    for folder, frames in valid_ranges:
        if isinstance(frames, int):
            if frames < video_duration:
                valid_frames_with_timecodes.append((folder, frames, frame_to_timecode(frames)))
        else:
            start_frame, end_frame = map(int, frames.split('-'))
            if start_frame < video_duration:
                print("Valid Frames to fix:")
                print_timecode_range(folder, start_frame, end_frame)
            

def print_timecode_range(folder, start_frame, end_frame):
    start_timecode = frame_to_timecode(start_frame)
    end_timecode = frame_to_timecode(end_frame)
    print(f"Location: {folder}")
    print(f"Range: {start_frame} - {end_frame}")
    print(f"Timecode Range: {start_timecode} - {end_timecode}\n")



# Function to convert frame number to timecode
def frame_to_timecode(frame):
    fps = 60  # Assuming 24 frames per second
    total_seconds = frame / fps
    hours = int(total_seconds // 3600)
    minutes = int((total_seconds % 3600) // 60)
    seconds = int(total_seconds % 60)
    frames = int(frame % fps)
    return f"{hours:02d}:{minutes:02d}:{seconds:02d}:{frames:02d}"

            
def export_to_xls(data, output_file):
    # Create DataFrame
    df = pd.DataFrame(data, columns=["Location", "Frames"])

    # Add a "Valid" column with default value "No"
    df["Valid"] = "No"

    # Remove duplicates
    df.drop_duplicates(inplace=True)

    # Save DataFrame to XLS file
    df.to_excel(output_file, index=False)

    print(f"Data exported to {output_file}")

# Paths to Baselight and Xytech files
baselight_file_path = "Baselight_export.txt"
xytech_file_path = "Xytech.txt"
video_file_path = "twitch_nft_demo.mp4"
BL_File = open("Baselight_export.txt", "r")  # Open Baselight file

# Xytech
with open("Xytech.txt") as f:
    XY_File = f.read().splitlines()

# Open Baselight file
with open(baselight_file_path, "r") as baselight_file:
    populate_baselight_collection(baselight_file)

# Open Xytech file
with open(xytech_file_path, "r") as xytech_file:
    populate_xytech_collection(xytech_file)

# Process collections and check frames against video duration
process_collectionsVideo(video_file_path)

output_file_path = "output.xlsx"
valid_frames = process_collections()
export_to_xls(valid_frames, output_file_path)


Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 2 - 4
Timecode Range: 00:00:00:02 - 00:00:00:04

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 31 - 33
Timecode Range: 00:00:00:31 - 00:00:00:33

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 67 - 70
Timecode Range: 00:00:01:07 - 00:00:01:10

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 122 - 123
Timecode Range: 00:00:02:02 - 00:00:02:03

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1111 - 1112
Timecode Range: 00:00:18:31 - 00:00:18:32

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1201 - 1205
Timecode Range: 00:00:20:01 - 00:00:20:05

Valid Frames to fix:
Location: /hpsans13/production/Dune2/reel1/partA/1920x1080
Range: 1211 - 1215
Timecode Range: 00:00:20:11 - 00:00:20:15

Valid Frames to fix:
L