In [2]:
import os
import pandas as pd
import re
from datetime import datetime, timedelta

def clean_text(text):
    # Remove special characters from the text
    cleaned_text = re.sub(r"[^\w\s]", "", text)
    return cleaned_text

def match_line(line, df):
    cleaned_line = clean_text(line.lower())  
    matched_rows = []
    for _, row in df.iterrows():
        transcription = clean_text(str(row['TRANSCRIPTION (SESOTHO)']).lower())
        translation = clean_text(str(row['TRANSLATION (ENGLISH)']).lower())
        if cleaned_line in transcription or cleaned_line in translation:
            matched_rows.append(row) 
    matched_rows_df = pd.concat(matched_rows, axis=1).transpose() if matched_rows else pd.DataFrame()
    return matched_rows_df

def get_word_indices(full_string, substring):
    full_list = full_string.split()
    sub_list = substring.split()
    length = len(sub_list)
    
    for i in range(len(full_list)):
        if full_list[i:i+length] == sub_list:
            return i, i+length
    return None, None

folder_path = "../Interview XLSX/"
file_extension = ".xlsx"

combined_df = pd.DataFrame()

for filename in os.listdir(folder_path):
    if filename.endswith(file_extension):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_excel(file_path)
        df['filepath'] = file_path
        combined_df = pd.concat([combined_df, df], ignore_index=True)

matched_rows = []

with open('../Script/test script.txt', 'r', encoding='utf-8') as file:
    lines = file.readlines()

narrator = None
language = None
for line in lines:
    line = line.strip().lower()
    if line.startswith('***'):
        narrator = line[3:].strip().lower()
        language = 'SESOTHO'
    elif line.startswith('###'):
        narrator = line[3:].strip().lower()
        language = 'ENGLISH'
    elif line and narrator and language:
        matched_rows_df = match_line(line, combined_df)
        cleaned_line = clean_text(line)
        if not matched_rows_df.empty:
            for _, row in matched_rows_df.iterrows():
                time_range = row['TIME'].split(" - ")
                start_time = datetime.strptime(time_range[0], '%H:%M:%S')
                end_time = datetime.strptime(time_range[1], '%H:%M:%S')
                total_duration = (end_time - start_time).total_seconds()

                transcription = clean_text(str(row['TRANSCRIPTION (SESOTHO)']).lower())
                translation = clean_text(str(row['TRANSLATION (ENGLISH)']).lower())
                original_text = transcription if cleaned_line in transcription else translation
                if cleaned_line == original_text:
                    new_row = {
                        'Text': line,
                        'Narrator': narrator,
                        'Language': language,
                        'Timecode Range': row['TIME'],
                        'FilePath': row['filepath']
                    }
                else:
                    start_index, end_index = get_word_indices(original_text, cleaned_line)
                    total_words = len(original_text.split())
                    start_time = start_time + timedelta(seconds=total_duration*start_index/total_words)
                    end_time = start_time + timedelta(seconds=total_duration*(end_index-start_index)/total_words)
                    new_row = {
                        'Text': line,
                        'Narrator': narrator,
                        'Language': language,
                        'Timecode Range': f'{start_time.time()} - {end_time.time()}',
                        'FilePath': row['filepath']
                    }
                matched_rows.append(new_row)
        else:  
            new_row = {
                'Text': line,
                'Narrator': narrator,
                'Language': language,
                'Timecode Range': None,
                'FilePath': None
            }
            matched_rows.append(new_row)

matched_df = pd.DataFrame(matched_rows)
matched_df.to_csv('../intermediate.csv', index=False)


In [None]:
# that two of the interviewees dont have xlsx, only txt files. 
# An additional simple python script will be needed to make this conversion
# in case the python script can identify all of these lines correctly, I recommend you add an additional 
# incorrect line to the script, to simulate the case where it doesn't identify it right, to make sure we are handling that

import csv
import os
import xml.etree.ElementTree as ET
import copy
import random

def convert_timecode(timecode):
    # Convert timecode from 'HH:MM-SS:MM' format to 'HH:MM:SS:FF' format
    start_time, end_time = timecode.split('-')
    start_time_parts = start_time.split(':')
    end_time_parts = end_time.split(':')

    # Convert minutes to seconds and add frames
    start_seconds = int(start_time_parts[0]) * 60 + int(start_time_parts[1])
    end_seconds = int(end_time_parts[0]) * 60 + int(end_time_parts[1])

    # Format as 'HH:MM:SS:FF'
    converted_start_time = '{:02d}:{:02d}:00:00'.format(start_seconds // 60, start_seconds % 60)
    converted_end_time = '{:02d}:{:02d}:00:00'.format(end_seconds // 60, end_seconds % 60)

    return converted_start_time, converted_end_time

def is_time_within_range(time, start_time, end_time):
    return start_time <= time <= end_time

def get_parent_map(root):
    parent_map = {c: p for p in root.iter() for c in p}
    return parent_map

def get_parent_element(parent_map, element):
    return parent_map.get(element)

def get_clip_type(parent_map, element):
    parent = get_parent_element(parent_map, element)
    while parent is not None:
        if parent.tag in ['video', 'audio']:
            return parent.tag
        parent = get_parent_element(parent_map, parent)
    return 'unknown'


def create_xml_structure(project_name, matches):
    # Create the root element and set attributes
    root = ET.Element("xmeml")
    root.set("version", "4")
    
    # Create the project element
    project = ET.SubElement(root, "project")
    
    # Create the name element and set the project name
    name = ET.SubElement(project, "name")
    name.text = project_name
    
    # Create the children element
    children = ET.SubElement(project, "children")
    
    # Create the sequence element
    sequence = ET.SubElement(children, "sequence")
    sequence.set("id", "sequence-1")
    sequence.set("TL.SQAudioVisibleBase", "0")
    sequence.set("TL.SQVideoVisibleBase", "0")
    sequence.set("TL.SQVisibleBaseTime", "1954072810692088")
    sequence.set("TL.SQAVDividerPosition", "0.556213021278")
    sequence.set("MZ.Sequence.PreviewUseMaxRenderQuality", "false")
    sequence.set("MZ.Sequence.PreviewUseMaxBitDepth", "false")
    sequence.set("MZ.Sequence.VideoTimeDisplayFormat", "998")
    sequence.set("MZ.WorkOutPoint", "8014566084840000")
    sequence.set("MZ.WorkInPoint", "0")
    sequence.set("explodedTracks", "true")
    
    # Create the duration element and set a value
    duration = ET.SubElement(sequence, "duration")
    duration.text = "424764"
    
    # Create the rate element and set the timebase and ntsc values
    rate = ET.SubElement(sequence, "rate")
    
    timebase = ET.SubElement(rate, "timebase")
    timebase.text = "24"
    
    ntsc = ET.SubElement(rate, "ntsc")
    ntsc.text = "FALSE"
    
    # Create the name element and set a value
    sequence_name = ET.SubElement(sequence, "name")
    sequence_name.text = "output"
    
    # Create the media element
    media = ET.SubElement(sequence, "media")
    
    # Create the video element
    video = ET.SubElement(media, "video")
    video_tracks = {}  # Store video tracks

    # Create the audio element
    audio = ET.SubElement(media, "audio")
    audio_tracks = {}  # Store audio tracks



    # Create the timecode element
    timecode = ET.SubElement(sequence, "timecode")
    
    # Create the rate element for timecode and set the timebase and ntsc values
    timecode_rate = ET.SubElement(timecode, "rate")
    
    timecode_timebase = ET.SubElement(timecode_rate, "timebase")
    timecode_timebase.text = "24"
    
    timecode_ntsc = ET.SubElement(timecode_rate, "ntsc")
    timecode_ntsc.text = "FALSE"
    
    # Create the string element for timecode and set a value
    timecode_string = ET.SubElement(timecode, "string")
    timecode_string.text = "00:00:00:00"
    
    # Create the frame element and set a value
    frame = ET.SubElement(timecode, "frame")
    frame.text = "0"
    
    # Create the displayformat element and set a value
    display_format = ET.SubElement(timecode, "displayformat")
    display_format.text = "NDF"
    
    # Create the labels element
    labels = ET.SubElement(sequence, "labels")
    
    # Create the label2 element and set a value
    label2 = ET.SubElement(labels, "label2")
    label2.text = "Forest"
    
    # Create the logginginfo element
    logging_info = ET.SubElement(sequence, "logginginfo")
    
    # Create the child elements for logginginfo and leave them empty
    child_elements = ["description", "scene", "shottake", "lognote", "good", "originalvideofilename", "originalaudiofilename"]
    for element in child_elements:
        ET.SubElement(logging_info, element)
    
    # Create the XML tree
    tree = ET.ElementTree(root)
     
    # Return the XML tree
    return tree


def extract_clips(csv_file, xml_folder, output_file):
    with open(csv_file, 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        matches = {}

        for row in reader:
            timecode_range = row['Timecode Range']
            if timecode_range != 'None':
                start_time, end_time = convert_timecode(timecode_range)

                xml_files = os.listdir(xml_folder)
                for xml_file in xml_files:
                    xml_path = os.path.join(xml_folder, xml_file)
                    tree = ET.parse(xml_path)
                    root = tree.getroot()
                    parent_map = get_parent_map(root)  # create the parent map here
                    clip_items = root.findall(".//clipitem")

                    for clip_item in clip_items:
                        timecode_element = clip_item.find(".//timecode/string")
                        if timecode_element is not None:
                            xml_timecode = timecode_element.text

                            if is_time_within_range(xml_timecode, start_time, end_time):
                                clip_type = get_clip_type(parent_map, clip_item)  # get the clip type here

                                track_properties = {}
                                track_element = get_parent_element(parent_map, clip_item)
                                if track_element is not None and track_element.tag == 'track':
                                    for attribute in track_element.attrib:
                                        track_properties[attribute] = track_element.attrib[attribute]

                                print('Match found in {}'.format(xml_file))
                                print('Timecode: {}'.format(xml_timecode))
                                print('Start time: {}'.format(start_time))
                                print('End time: {}'.format(end_time))
                                print('Clip Type: {}'.format(clip_type))
                                print('Track Properties: {}'.format(track_properties))

                                # Add to matches dictionary
                                if track_element not in matches:
                                    matches[track_element] = []
                                matches[track_element].append((clip_item, clip_type, track_properties))

    # return matches

        if matches:
            output_tree = create_xml_structure('test', matches)
            output_tree.write(output_file, encoding='utf-8', xml_declaration=True)

# Usage example
csv_file = '../output.csv'
xml_folder = '../interview xmls'
output_file = '../xml exports//matched_clips.xml'

extract_clips(csv_file, xml_folder, output_file)


In [26]:
import csv
import os
import xml.etree.ElementTree as ET
from datetime import datetime, timedelta
import copy
import random

final_clip_list = []

def create_xml(xml_name,xml_json_data):
    
    # Create the root element
    root = ET.Element("xmeml", version="4")
    
    # Create the sequence element with attributes
    sequence = ET.SubElement(root, "sequence", id="sequence-2", TL_SQAudioVisibleBase="0", TL_SQVideoVisibleBase="0",
                             TL_SQVisibleBaseTime="0", TL_SQAVDividerPosition="0.5", TL_SQHideShyTracks="0",
                             TL_SQHeaderWidth="236", TL_SQDataTrackViewControlState="0",
                             Monitor_ProgramZoomOut="340011984312000", Monitor_ProgramZoomIn="0",
                             TL_SQTimePerPixel="1.6034289012958367", MZ_EditLine="333083126376000",
                             MZ_Sequence_PreviewFrameSizeHeight="1080", MZ_Sequence_PreviewFrameSizeWidth="1920",
                             MZ_Sequence_AudioTimeDisplayFormat="200", MZ_Sequence_PreviewUseMaxRenderQuality="false",
                             MZ_Sequence_PreviewUseMaxBitDepth="false", MZ_Sequence_VideoTimeDisplayFormat="110",
                             MZ_WorkOutPoint="15235011792000", MZ_WorkInPoint="0", MZ_ZeroPoint="0", explodedTracks="true")
    
    # Add the uuid element
    uuid = ET.SubElement(sequence, "uuid")
    uuid.text = "50e61931-251f-4069-8193-a3fbad7f93ff"
    
    # Add the duration element
    duration = ET.SubElement(sequence, "duration")
    duration.text = "31533"
    
    # Add the rate element with nested timebase and ntsc elements
    rate = ET.SubElement(sequence, "rate")
    timebase = ET.SubElement(rate, "timebase")
    timebase.text = "24"
    ntsc = ET.SubElement(rate, "ntsc")
    ntsc.text = "TRUE"
    
    # Add the name element
    name_element = ET.SubElement(sequence, "name")
    name_element.text = xml_name
    
    # Add the media element with nested video and audio elements
    media = ET.SubElement(sequence, "media")
    video = ET.SubElement(media, "video")

    # Add the format element with nested samplecharacteristics element
    format_ = ET.SubElement(video, "format")
    samplecharacteristics = ET.SubElement(format_, "samplecharacteristics")
    
    # Add the rate element with nested timebase and ntsc elements inside samplecharacteristics
    rate = ET.SubElement(samplecharacteristics, "rate")
    timebase = ET.SubElement(rate, "timebase")
    timebase.text = "24"
    ntsc = ET.SubElement(rate, "ntsc")
    ntsc.text = "TRUE"
    
    # Add the codec element with nested name and appspecificdata elements
    codec = ET.SubElement(samplecharacteristics, "codec")
    name = ET.SubElement(codec, "name")
    name.text = "Apple ProRes 422"
    appspecificdata = ET.SubElement(codec, "appspecificdata")
    
    # Add the appname, appmanufacturer, and appversion elements inside appspecificdata
    appname = ET.SubElement(appspecificdata, "appname")
    appname.text = "Final Cut Pro"
    appmanufacturer = ET.SubElement(appspecificdata, "appmanufacturer")
    appmanufacturer.text = "Apple Inc."
    appversion = ET.SubElement(appspecificdata, "appversion")
    appversion.text = "7.0"
    
    # Add the data element with nested qtcodec element inside appspecificdata
    data = ET.SubElement(appspecificdata, "data")
    qtcodec = ET.SubElement(data, "qtcodec")
    codecname = ET.SubElement(qtcodec, "codecname")
    codecname.text = "Apple ProRes 422"
    codectypename = ET.SubElement(qtcodec, "codectypename")
    codectypename.text = "Apple ProRes 422"
    codectypecode = ET.SubElement(qtcodec, "codectypecode")
    codectypecode.text = "apcn"
    codecvendorcode = ET.SubElement(qtcodec, "codecvendorcode")
    codecvendorcode.text = "appl"
    spatialquality = ET.SubElement(qtcodec, "spatialquality")
    spatialquality.text = "1024"
    temporalquality = ET.SubElement(qtcodec, "temporalquality")
    temporalquality.text = "0"
    keyframerate = ET.SubElement(qtcodec, "keyframerate")
    keyframerate.text = "0"
    datarate = ET.SubElement(qtcodec, "datarate")
    datarate.text = "0"
    
    # Add the width, height, anamorphic, pixelaspectratio, fielddominance, and colordepth elements inside samplecharacteristics
    width = ET.SubElement(samplecharacteristics, "width")
    width.text = "1920"
    height = ET.SubElement(samplecharacteristics, "height")
    height.text = "1080"
    anamorphic = ET.SubElement(samplecharacteristics, "anamorphic")
    anamorphic.text = "FALSE"
    pixelaspectratio = ET.SubElement(samplecharacteristics, "pixelaspectratio")
    pixelaspectratio.text = "square"
    fielddominance = ET.SubElement(samplecharacteristics, "fielddominance")
    fielddominance.text = "none"
    colordepth = ET.SubElement(samplecharacteristics, "colordepth")
    colordepth.text = "24"

    # Get the track elements
    video_tracks = [1]
    audio_tracks = []
    for clip in xml_json_data:

        audio_track_indexes = [link["trackindex"] for link in clip["video_clips"][0]["links"] if link["mediatype"] == "audio"]
        # print(audio_track_indexes)
        for audio_track_index in audio_track_indexes:
            if audio_track_index not in audio_tracks:
                audio_tracks.append(audio_track_index)

    # Create video track elements and append video clips
    for video_track_index in video_tracks:
        video_track = ET.SubElement(video, "track", TL_SQTrackShy="0", TL_SQTrackExpandedHeight="25",
                                    TL_SQTrackExpanded="0", MZ_TrackTargeted="0")
        # video_track.set("trackindex", str(video_track_index))

        for clip in xml_json_data:
            video_clip = clip["video_clips"][0]["video_clip_element"]
            video_track.append(copy.deepcopy(video_clip))

   
    

    
    # Add the audio element inside media
    audio = ET.SubElement(media, "audio")

    # constant stuff

    # Create subelements and append them to the audio element
    num_output_channels = ET.SubElement(audio, 'numOutputChannels')
    num_output_channels.text = '2'

    format_element = ET.SubElement(audio, 'format')
    sample_characteristics = ET.SubElement(format_element, 'samplecharacteristics')
    depth = ET.SubElement(sample_characteristics, 'depth')
    depth.text = '16'
    sample_rate = ET.SubElement(sample_characteristics, 'samplerate')
    sample_rate.text = '48000'

    outputs = ET.SubElement(audio, 'outputs')

    group_1 = ET.SubElement(outputs, 'group')
    index_1 = ET.SubElement(group_1, 'index')
    index_1.text = '1'
    num_channels_1 = ET.SubElement(group_1, 'numchannels')
    num_channels_1.text = '1'
    downmix_1 = ET.SubElement(group_1, 'downmix')
    downmix_1.text = '0'
    channel_1 = ET.SubElement(group_1, 'channel')
    channel_index_1 = ET.SubElement(channel_1, 'index')
    channel_index_1.text = '1'

    group_2 = ET.SubElement(outputs, 'group')
    index_2 = ET.SubElement(group_2, 'index')
    index_2.text = '2'
    num_channels_2 = ET.SubElement(group_2, 'numchannels')
    num_channels_2.text = '1'
    downmix_2 = ET.SubElement(group_2, 'downmix')
    downmix_2.text = '0'
    channel_2 = ET.SubElement(group_2, 'channel')
    channel_index_2 = ET.SubElement(channel_2, 'index')
    channel_index_2.text = '2'

     # Create audio track elements and append audio clips
    for audio_track_index in audio_tracks:
        audio_track = ET.SubElement(audio, "track", TL_SQTrackAudioKeyframeStyle="0", TL_SQTrackShy="0",
                                    TL_SQTrackExpandedHeight="25", TL_SQTrackExpanded="0",
                                    MZ_TrackTargeted="1", PannerCurrentValue="0.5", PannerIsInverted="true",
                                    PannerStartKeyframe="-91445760000000000,0.5,0,0,0,0,0,0", PannerName="Balance",
                                    currentExplodedTrackIndex=f"{audio_track_index-1}", totalExplodedTrackCount="2",
                                    premiereTrackType="Stereo")
        # audio_track.set("trackindex", str(audio_track_index))

        for clip in xml_json_data:
            audio_clip_elements = clip["video_clips"][0]["linked_audio_clip_elements_list"]
            links=clip["video_clips"][0]["links"]

            # for link in links:
            #     track_index = int(link.get("trackindex", -1))
            #     if track_index == audio_track_index - 1:
            #         audio_clip_id = link.get("linkclipref")
            #         for audio_clip in audio_clip_elements:
            #             if str(audio_clip.attrib['id']) == str(audio_clip_id):
            #                 audio_track.append(copy.deepcopy(audio_clip))

            
            for audio_clip in audio_clip_elements:
                    # if link.get("trackindex") == str(audio_track_index-1):
                audio_track.append(copy.deepcopy(audio_clip))
    
    # Add the timecode element with nested rate, string, frame, and displayformat elements
    timecode = ET.SubElement(sequence, "timecode")
    rate = ET.SubElement(timecode, "rate")
    timebase = ET.SubElement(rate, "timebase")
    timebase.text = "24"
    ntsc = ET.SubElement(rate, "ntsc")
    ntsc.text = "TRUE"
    string = ET.SubElement(timecode, "string")
    string.text = "00:00:00:00"
    frame = ET.SubElement(timecode, "frame")
    frame.text = "0"
    displayformat = ET.SubElement(timecode, "displayformat")
    displayformat.text = "NDF"
    
    # Add the labels element with nested label2 element
    labels = ET.SubElement(sequence, "labels")
    label2 = ET.SubElement(labels, "label2")
    label2.text = "Forest"
    


    
    # Create the ElementTree object with the root element
    tree = ET.ElementTree(root)
    
    # Generate a random Idd
    idd = str(random.randint(1, 1000))
    
    # Save the XML to a file
    filename = f"../xml exports/{xml_name.replace(' ', '_')}-{idd}.xml"
    tree.write(filename, encoding="utf-8", xml_declaration=True)
    print(f"XML saved to {filename}")


def extract_timecode(timecode_range):
    start_time, end_time = timecode_range.split(" - ")
    start_time = start_time.strip().replace(" ", "")
    end_time = end_time.strip().replace(" ", "")
    print(start_time, end_time)
    return start_time, end_time


def process_csv_file(csv_file, xml_folder):
    with open(csv_file, "r") as file:
        csv_reader = csv.DictReader(file)
        for row in csv_reader:
            timecode_range = row["Timecode Range"]
            if timecode_range:
                start_time, end_time = extract_timecode(timecode_range)
                narrator_name = row["Narrator"]
                print(start_time, end_time, narrator_name)
                process_xml_files(xml_folder, start_time, end_time, narrator_name)


def process_xml_files(xml_folder, start_time, end_time, narrator_name):
    for filename in os.listdir(xml_folder):
        if filename.endswith(".xml") and narrator_name in filename:
            # print(filename,narrator_name)
            xml_file = os.path.join(xml_folder, filename)
            matched_clips = extract_sequence_info(xml_file, start_time, end_time)
            final_clip_list.append(matched_clips)


def convert_time_to_frames(time, rate):
    time_format = "%H:%M:%S"
    if "." in time:
        time_format += ".%f"
    time_obj = datetime.strptime(time, time_format)
    time_delta = time_obj - datetime.strptime("00:00:00", "%H:%M:%S")
    frame_count = int(time_delta.total_seconds() * rate)
    return frame_count


def extract_sequence_info(xml_file, start_time, end_time):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Extract sequence information
    sequence = root.find('sequence')
    sequence_info = {
        'duration': int(sequence.find('duration').text),
        'rate': {
            'timebase': int(sequence.find('rate/timebase').text),
            'ntsc': sequence.find('rate/ntsc').text == 'TRUE'
        }
    }
    sequence_rate = sequence_info['rate']['timebase']

    # Convert start and end times to frames
    start_frame = convert_time_to_frames(start_time, sequence_rate)
    end_frame = convert_time_to_frames(end_time, sequence_rate)
    print(start_frame, end_frame)
    print(sequence_rate,'sequence_rate')

    # Extract video clip information
    video_clips = []
    for clip_item in root.findall('.//video//clipitem'):  # Only consider clip items within the <video> tag
        
        clip_info = {
            'id': clip_item.attrib['id'],  # Get the clip ID
            'name': clip_item.find('name').text,
            'duration': int(clip_item.find('duration').text),
            'rate': {
                'timebase': int(clip_item.find('rate/timebase').text),
                'ntsc': clip_item.find('rate/ntsc').text == 'TRUE'
            },
            'in': int(clip_item.find('in').text),
            'out': int(clip_item.find('out').text),
            'start': int(clip_item.find('start').text),
            'end': int(clip_item.find('end').text),
            'links': [],  # Initialize an empty list to store links,
            'video_clip_element': None,  # Store the clip item element for later use
            'linked_audio_clip_elements_list': []  # Initialize an empty list to store linked clip items
        }
        links = clip_item.findall('link')
        for link in links:
            link_info = {
                'linkclipref': link.find('linkclipref').text,
                'mediatype': link.find('mediatype').text,
                'trackindex': int(link.find('trackindex').text),
                'clipindex': int(link.find('clipindex').text)
            }
            if link.find('groupindex') is not None:
                link_info['groupindex'] = int(link.find('groupindex').text)
            clip_info['links'].append(link_info)
            if link.find('mediatype').text == 'audio':
                audio_clip_items = root.findall('.//audio//clipitem')
                for audio_clip_item in audio_clip_items:
                    if audio_clip_item.attrib['id'] == link.find('linkclipref').text:
                        # audio_clip_item.find('in').text = str(start_frame)  
                        # audio_clip_item.find('out').text = str(end_frame)
                        clip_info['linked_audio_clip_elements_list'].append(audio_clip_item)

        # Check if the clip's in or out frame falls within the given start and end frames
        print(clip_info['in'], start_frame, clip_info['out'])
        if clip_info['in'] <= start_frame <= clip_info['out']:
            clip_item.find('in').text = str(start_frame)  
            clip_item.find('out').text = str(end_frame)
            # clip_item.find('start').text = str(start_frame)  
            # clip_item.find('end').text = str(end_frame)
            clip_info['video_clip_element']=clip_item
            video_clips.append(clip_info)
        # print(clip_info, start_frame, end_frame)

    # Create result dictionary
    result = {
        'sequence_info': sequence_info,
        'video_clips': video_clips
    }

    if not video_clips:  # Check if video_clips list is empty
        return None
    
    return result


# Example usage
csv_file = "../intermediate_test.csv"
xml_folder = "../Interview XML/"

process_csv_file(csv_file, xml_folder)
final_clip_list = [item for item in final_clip_list if item is not None]
print(final_clip_list)
create_xml('testy',final_clip_list)
# print(final_clip_list)


00:00:14 00:00:25
00:00:14 00:00:25 vid2
336 600
24 sequence_rate
0 336 313
[]
XML saved to ../xml exports/testy-760.xml
