In [1]:
import re
from datetime import timedelta

class TranscriptHelper:
    def __init__(self, filepath):
        """
        Initialize the TranscriptHelper by reading and parsing the transcript file.

        :param filepath: Path to the transcript.txt file.
        """
        self.filepath = filepath
        self.segments = []  # List to hold each speaking segment
        self._parse_transcript()

    def _parse_transcript(self):
        """
        Parse the transcript file and populate the segments list with dictionaries
        containing speaker, start_time, end_time, and text.
        """
        # Adjusted regex pattern to match the timestamp format accurately
        speaker_pattern = re.compile(r'^SPEAKER (\d+) (\d+):(\d+):(\d+):(\d+)$')
        current_segment = None

        with open(self.filepath, 'r') as file:
            for line in file:
                line = line.strip()
                if not line:
                    continue  # Skip empty lines

                speaker_match = speaker_pattern.match(line)
                if speaker_match:
                    # If there's an ongoing segment, set its end_time to the current speaker's start_time
                    if current_segment:
                        current_segment['end_time'] = self._format_time(
                            speaker_match.group(2),
                            speaker_match.group(3),
                            speaker_match.group(4),
                            speaker_match.group(5)
                        )
                        self.segments.append(current_segment)

                    speaker_id = f"Speaker {speaker_match.group(1)}"
                    start_time = self._format_time(
                        speaker_match.group(2),
                        speaker_match.group(3),
                        speaker_match.group(4),
                        speaker_match.group(5)
                    )
                    current_segment = {
                        'speaker': speaker_id,
                        'start_time': start_time,
                        'end_time': None,
                        'text': ""
                    }
                else:
                    if current_segment:
                        current_segment['text'] += line + " "

            # After the loop, add the last segment with end_time same as start_time (or handle differently)
            if current_segment:
                # Assuming the last segment ends at its start time; adjust if you have a known end time
                current_segment['end_time'] = current_segment['start_time']
                self.segments.append(current_segment)

        # Now, update end_time for each segment based on the next segment's start_time
        for i in range(len(self.segments) - 1):
            self.segments[i]['end_time'] = self.segments[i + 1]['start_time']
        # For the last segment, end_time remains as start_time (zero duration)

    def _format_time(self, hours, minutes, seconds, milliseconds):
        """
        Convert time components to total milliseconds.

        :return: Total milliseconds as an integer.
        """
        td = timedelta(
            hours=int(hours),
            minutes=int(minutes),
            seconds=int(seconds),
            milliseconds=int(milliseconds)
        )
        return int(td.total_seconds() * 1000)  # Convert to milliseconds

    def talking_speaker(self):
        """
        Returns a list of all unique speakers in the transcript.

        :return: List of unique speakers.
        """
        return list(set(segment['speaker'] for segment in self.segments))

    def duration_of_speaking_ms(self, speaker=None):
        """
        Calculate the total duration of speaking for each speaker or a specific speaker
        in milliseconds.

        :param speaker: (Optional) Specific speaker to calculate duration for.
        :return: Total duration in milliseconds or a dictionary of durations per speaker.
        """
        durations = {}
        for segment in self.segments:
            current_speaker = segment['speaker']
            start = segment['start_time']
            end = segment['end_time'] if segment['end_time'] else start
            duration = end - start  # Duration in milliseconds
            if current_speaker in durations:
                durations[current_speaker].append(duration)
            else:
                durations[current_speaker] = [duration]

        if speaker:
            return durations.get(speaker, [])
        return durations

    def total_duration_ms(self):
        """
        Calculate the total duration of the transcript in milliseconds.

        :return: Total duration in milliseconds.
        """
        if not self.segments:
            return 0
        start = self.segments[0]['start_time']
        end = self.segments[-1]['end_time'] if self.segments[-1]['end_time'] else self.segments[-1]['start_time']
        return end - start

    def get_conversation(self):
        """
        Retrieve the entire conversation as a list of dictionaries.

        :return: List of segments with speaker, start_time, end_time, and text.
        """
        return self.segments

    def get_speech_at_time_ms(self, query_time_ms):
        """
        Retrieve the speech segment at a specific time in milliseconds.

        :param query_time_ms: Time in milliseconds.
        :return: The segment dictionary if found, else None.
        """
        for segment in self.segments:
            start = segment['start_time']
            end = segment['end_time']
            if end is None:
                end = start
            if start <= query_time_ms < end:
                return segment
        return None

    def speaker_durations_ms(self):
        """
        Retrieve a dictionary where each key is a speaker and the value is a list of
        durations (in milliseconds) for each time the speaker spoke.

        :return: Dictionary with speakers as keys and lists of durations in milliseconds as values.
        """
        return self.duration_of_speaking_ms()




In [2]:
# Example Usage
if __name__ == "__main__":
    # Initialize the helper with the path to transcript.txt
    helper = TranscriptHelper('transcript.txt')

    # Get all unique speakers
    speakers = helper.talking_speaker()
    print("Speakers:", speakers)
    # Expected Output: Speakers: ['Speaker 1', 'Speaker 2']

    # Get total duration of the transcript
    total = helper.total_duration_ms()
    print(f"Total Duration: {total} milliseconds")
    # Expected Output: Total Duration: XXXX milliseconds

    # Get durations of each speaker in milliseconds
    durations = helper.speaker_durations_ms()
    for speaker, duration_list in durations.items():
        print(f"{speaker} durations (ms): {duration_list}")
    # Expected Output:
    # Speaker 1 durations (ms): [16160, 1000]
    # Speaker 2 durations (ms): [some_number]

    # Get durations for a specific speaker
    speaker1_durations = helper.duration_of_speaking_ms('Speaker 1')
    print(f"Speaker 1 Durations: {speaker1_durations} milliseconds")
    # Expected Output: Speaker 1 Durations: [16160, 1000]

    

    # Retrieve speech at a specific time (e.g., 17000 milliseconds)
    time_query_ms = 17000  # 17 seconds
    speech = helper.get_speech_at_time_ms(time_query_ms)
    if speech:
        print(f"At {time_query_ms} milliseconds, {speech['speaker']} was speaking: {speech['text']}")
    else:
        print(f"No speech found at {time_query_ms} milliseconds.")

Speakers: ['Speaker 2', 'Speaker 1']
Total Duration: 872039 milliseconds
Speaker 1 durations (ms): [16160, 16061, 1120, 1081, 3121, 9561, 2120, 1479, 880, 4561, 1719, 1959, 20601, 840, 840, 1920, 2440, 840, 5359, 1199, 10241, 3081, 1199, 840, 1400, 1241, 840, 5080, 9841, 1160, 11120, 840, 840, 1201, 1881, 840, 840, 20120, 1120, 1361, 841, 1920, 5919, 840, 841, 841, 1560, 7800, 1000, 4561, 3120, 8481, 10320, 2399, 5799, 880, 6159, 3359, 6080, 7921, 9159, 2000, 6121, 959, 2440, 2799, 8019, 1120, 1240, 840, 1959, 3961, 2921, 9120, 920, 2500, 7040, 8440, 7600, 839, 3679, 7439, 1121, 1600, 7319, 3680, 5721, 1120, 6681, 3361, 1601, 1601, 1400, 9241, 7159, 840, 11680, 5079, 2160, 1040, 1000, 1360, 8319, 1439, 3601, 840, 1059, 1559, 1881, 3400, 839, 2441, 4801, 38820, 3140, 4160, 8399, 9699, 2321, 841, 1959, 6159, 1960, 841, 1679, 1600, 840, 1439, 1679, 2481, 1959, 2361, 1480, 3599, 6400, 2681, 2921, 0]
Speaker 2 durations (ms): [3019, 2780, 2519, 8199, 3799, 1000, 9720, 4521, 1799, 3640, 841,

In [3]:
helper = TranscriptHelper('transcript.txt')
durations = helper.speaker_durations_ms()
durations['Speaker 1'][0]

16160

In [4]:
speech = helper.get_speech_at_time_ms(10159)
speech

{'speaker': 'Speaker 1',
 'start_time': 0,
 'end_time': 16160,
 'text': "Welcome to the deep dive. We're doing something a little different with this deep dive. We're gonna be looking at a whole bunch of fascinating facts and trivia that came from a conversation we had earlier. That's right. And we thought it would be fun to share some of those nuggets of knowledge with you. "}

In [5]:
import os
from moviepy.editor import VideoFileClip

def trim_video(input_path, start_ms, end_ms, output_folder='test'):
    """
    Trims a segment from the input video based on start and end times in milliseconds
    and saves the trimmed video in the specified output folder.

    :param input_path: Path to the input video file.
    :param start_ms: Start time in milliseconds.
    :param end_ms: End time in milliseconds.
    :param output_folder: Folder to save the trimmed video. Defaults to 'test'.
    """
    # Validate input file existence
    if not os.path.isfile(input_path):
        raise FileNotFoundError(f"The input video file '{input_path}' does not exist.")

    # Validate time inputs
    if start_ms < 0 or end_ms < 0:
        raise ValueError("Start and end times must be non-negative integers representing milliseconds.")
    if start_ms >= end_ms:
        raise ValueError("Start time must be less than end time.")

    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Convert milliseconds to seconds for moviepy
    start_time = start_ms / 1000.0
    end_time = end_ms / 1000.0

    try:
        # Load the video clip
        with VideoFileClip(input_path) as video:
            video_duration = video.duration  # in seconds

            # Validate end_time against video duration
            if start_time > video_duration:
                raise ValueError("Start time exceeds video duration.")
            if end_time > video_duration:
                print("End time exceeds video duration. Trimming up to the video's end.")
                end_time = video_duration

            # Trim the video
            trimmed_clip = video.subclip(start_time, end_time)

            # Prepare output file name
            base_name = os.path.basename(input_path)
            name, ext = os.path.splitext(base_name)
            output_filename = f"{name}_trimmed_{start_ms}ms-{end_ms}ms{ext}"
            output_path = os.path.join(output_folder, output_filename)

            # Write the trimmed clip to the output path
            trimmed_clip.write_videofile(
                output_path,
                codec="libx264",          # Video codec
                audio_codec="aac",        # Audio codec
                temp_audiofile="temp-audio.m4a",
                remove_temp=True,
                verbose=False,
                logger=None
            )

        print(f"Trimmed video saved to '{output_path}'")

    except Exception as e:
        print(f"An error occurred while trimming the video: {e}")


In [6]:
input_video = "/home/codered/Downloads/aipod/scripy_main/animations/man/sip_coffee/_with_lip_move.mp4"  # Replace with your video file path
start_time_ms = 0  # Start at 5,000 milliseconds (5 seconds)
end_time_ms = 3100   # End at 15,000 milliseconds (15 seconds)

trim_video(input_video, start_time_ms, end_time_ms)


Trimmed video saved to 'test/_with_lip_move_trimmed_0ms-3100ms.mp4'


In [7]:
speaker_1 = durations['Speaker 1']
speaker_2 = durations['Speaker 2']

In [8]:
import os
from moviepy.editor import VideoFileClip

def scan_animations_directory_with_duration_ms(base_path='animations'):
    """
    Scans the 'animations' directory and creates a dictionary of paths and video durations (in milliseconds)
    for male and female animations.

    Directory Structure Assumption:
    ├── animations
    │   ├── girl
    │   │   ├── fill
    │   │   │   ├── _with_lip_move.mp4
    │   │   │   └── _without_lip_move.mp4
    │   │   ├── nod
    │   │   │   ├── _with_lip_move.mp4
    │   │   │   └── _without_lip_move.mp4
    │   │   └── yes_long
    │   │       ├── _with_lip_move.mp4
    │   │       └── _without_lip_move.mp4
    │   └── man
    │       ├── fill
    │       │   ├── _with_lip_move.mp4
    │       │   └── _without_lip_move.mp4
    │       ├── nod
    │       │   ├── _with_lip_move.mp4
    │       │   └── _without_lip_move.mp4
    │       ├── sip_coffee
    │       │   ├── _with_lip_move.mp4
    │       │   └── _without_lip_move.mp4
    │       └── yes_long
    │           ├── _with_lip_move.mp4
    │           └── _without_lip_move.mp4

    :param base_path: Path to the 'animations' directory.
    :return: Dictionary with 'male' and 'female' as keys, each mapping to dictionaries of animation types,
             their corresponding video paths, and video durations in milliseconds.
    """
    # Mapping from directory names to desired dictionary keys
    gender_map = {
        'man': 'male',
        'girl': 'female'
    }

    # Initialize the result dictionary
    animations_dict = {
        'male': {},
        'female': {}
    }

    # Verify that the base_path exists and is a directory
    if not os.path.isdir(base_path):
        raise ValueError(f"The directory '{base_path}' does not exist.")

    # Iterate over each gender directory (e.g., 'man', 'girl')
    for gender_dir in os.listdir(base_path):
        gender_path = os.path.join(base_path, gender_dir)

        # Skip if not a directory
        if not os.path.isdir(gender_path):
            continue

        # Map the directory name to 'male' or 'female'
        gender_key = gender_map.get(gender_dir.lower())
        if not gender_key:
            print(f"Warning: Unrecognized gender directory '{gender_dir}'. Skipping.")
            continue

        # Iterate over each animation type within the gender directory
        for animation_type in os.listdir(gender_path):
            animation_path = os.path.join(gender_path, animation_type)

            # Skip if not a directory
            if not os.path.isdir(animation_path):
                continue

            # Initialize dictionary to hold paths and duration
            animation_info = {
                'paths': [],
                'duration_ms': None
            }

            # Define expected filenames
            with_lip_filename = '_with_lip_move.mp4'
            without_lip_filename = '_without_lip_move.mp4'

            # Construct full paths
            with_lip_path = os.path.join(animation_path, with_lip_filename)
            without_lip_path = os.path.join(animation_path, without_lip_filename)

            # Check if files exist and add to the list
            if os.path.isfile(with_lip_path):
                animation_info['paths'].append(os.path.abspath(with_lip_path))
            else:
                print(f"Warning: '{with_lip_filename}' not found in '{animation_path}'.")

            if os.path.isfile(without_lip_path):
                animation_info['paths'].append(os.path.abspath(without_lip_path))
            else:
                print(f"Warning: '{without_lip_filename}' not found in '{animation_path}'.")

            # Only proceed if both videos are found
            if len(animation_info['paths']) == 2:
                # Retrieve duration from the first video
                try:
                    with VideoFileClip(animation_info['paths'][0]) as clip:
                        duration = clip.duration  # Duration in seconds (float)
                        animation_info['duration_ms'] = int(duration * 1000)  # Convert to milliseconds
                except Exception as e:
                    print(f"Error: Could not retrieve duration for '{animation_path}': {e}")
                    animation_info['duration_ms'] = None  # Or handle as needed

                # Add to the dictionary
                animations_dict[gender_key][animation_type] = animation_info
            else:
                print(f"Info: Incomplete videos for '{animation_type}' in '{gender_key}'. Skipping.")

    return animations_dict


In [9]:
animations_dict = scan_animations_directory_with_duration_ms('animations')
animations_dict

{'male': {'yes_long': {'paths': ['/home/codered/Downloads/aipod/scripy_main/animations/man/yes_long/_with_lip_move.mp4',
    '/home/codered/Downloads/aipod/scripy_main/animations/man/yes_long/_without_lip_move.mp4'],
   'duration_ms': 1750},
  'fill': {'paths': ['/home/codered/Downloads/aipod/scripy_main/animations/man/fill/_with_lip_move.mp4',
    '/home/codered/Downloads/aipod/scripy_main/animations/man/fill/_without_lip_move.mp4'],
   'duration_ms': 1000},
  'sip_coffee': {'paths': ['/home/codered/Downloads/aipod/scripy_main/animations/man/sip_coffee/_with_lip_move.mp4',
    '/home/codered/Downloads/aipod/scripy_main/animations/man/sip_coffee/_without_lip_move.mp4'],
   'duration_ms': 10030},
  'nod': {'paths': ['/home/codered/Downloads/aipod/scripy_main/animations/man/nod/_with_lip_move.mp4',
    '/home/codered/Downloads/aipod/scripy_main/animations/man/nod/_without_lip_move.mp4'],
   'duration_ms': 1750}},
 'female': {'yes_long': {'paths': ['/home/codered/Downloads/aipod/scripy_ma

In [10]:
animations_dict['male']['sip_coffee']['duration_ms']

10030

In [11]:
import random
choice = random.choice(['yes_long', 'fill', 'sip_coffee', 'nod'])

animations_dict['male'][choice]['paths'][0]

'/home/codered/Downloads/aipod/scripy_main/animations/man/sip_coffee/_with_lip_move.mp4'

In [21]:
animations_dict['male'][choice]['paths'][0]

'/home/codered/Downloads/aipod/scripy_main/animations/man/sip_coffee/_with_lip_move.mp4'

In [12]:
a = speaker1_durations = helper.duration_of_speaking_ms('Speaker 1')
b = speaker2_durations = helper.duration_of_speaking_ms('Speaker 2')
c = []
def concat_turn_by_turn(a, b):
    for i in range(len(b)):
        c.append(a[i])
        c.append(b[i])
    return c

turn_time = concat_turn_by_turn(a,b)

In [13]:
print(turn_time)

[16160, 3019, 16061, 2780, 1120, 2519, 1081, 8199, 3121, 3799, 9561, 1000, 2120, 9720, 1479, 4521, 880, 1799, 4561, 3640, 1719, 841, 1959, 1000, 20601, 2199, 840, 2201, 840, 1520, 1920, 1039, 2440, 4201, 840, 4000, 5359, 841, 1199, 840, 10241, 1039, 3081, 14720, 1199, 1880, 840, 1801, 1400, 5519, 1241, 919, 840, 880, 5080, 1000, 9841, 960, 1160, 2359, 11120, 2241, 840, 2239, 840, 3720, 1201, 2959, 1881, 1440, 840, 3760, 840, 1040, 20120, 839, 1120, 1240, 1361, 3519, 841, 1479, 1920, 1921, 5919, 1160, 840, 4560, 841, 7159, 841, 3480, 1560, 919, 7800, 1241, 1000, 2759, 4561, 839, 3120, 2360, 8481, 839, 10320, 841, 2399, 841, 5799, 2961, 880, 2680, 6159, 961, 3359, 1321, 6080, 839, 7921, 1120, 9159, 841, 2000, 1319, 6121, 3120, 959, 6560, 2440, 841, 2799, 1641, 8019, 3900, 1120, 1401, 1240, 839, 840, 841, 1959, 880, 3961, 839, 2921, 1639, 9120, 1121, 920, 820, 2500, 1880, 7040, 1479, 8440, 841, 7600, 1360, 839, 3721, 3679, 1441, 7439, 3320, 1121, 840, 1600, 1160, 7319, 960, 3680, 1320, 57

In [14]:
#creating the timeline of the first 60 seconds or 60000 miliseconds

current_timings=0
timeline = []
command = ''
i = 0
extra_time = 0

helper.get_speech_at_time_ms(current_timings)
#while the durations of the timeline is 60000 miliseconds 
while current_timings<60000:
    #pick a random video
    choice = random.choice(['yes_long', 'fill', 'sip_coffee', 'nod'])
    animation_duration = animations_dict['male'][choice]['duration_ms']
    
    command += f"picked {choice} of {animation_duration} \n"
        

    if current_timings + animation_duration < turn_time[i]:
        current_timings += animation_duration
        command += f'with lip move {current_timings} full \n'
        print(command)
    else:
        extra_time = turn_time[i] - current_timings
        print(f' extra time {extra_time} ')
        current_timings += animation_duration
        command += f'without lip move {current_timings} \n '
        print(command)
        i+=1
    # print(helper.get_speech_at_time_ms(current_timings)['speaker'])

picked nod of 1750 
with lip move 1750 full 

picked nod of 1750 
with lip move 1750 full 
picked nod of 1750 
with lip move 3500 full 

picked nod of 1750 
with lip move 1750 full 
picked nod of 1750 
with lip move 3500 full 
picked sip_coffee of 10030 
with lip move 13530 full 

picked nod of 1750 
with lip move 1750 full 
picked nod of 1750 
with lip move 3500 full 
picked sip_coffee of 10030 
with lip move 13530 full 
picked yes_long of 1750 
with lip move 15280 full 

 extra time 880 
picked nod of 1750 
with lip move 1750 full 
picked nod of 1750 
with lip move 3500 full 
picked sip_coffee of 10030 
with lip move 13530 full 
picked yes_long of 1750 
with lip move 15280 full 
picked fill of 1000 
without lip move 16280 
 
 extra time -13261 
picked nod of 1750 
with lip move 1750 full 
picked nod of 1750 
with lip move 3500 full 
picked sip_coffee of 10030 
with lip move 13530 full 
picked yes_long of 1750 
with lip move 15280 full 
picked fill of 1000 
without lip move 16280 
 pi

In [15]:
def aggregate_numbers(lst):
    result = []
    current_sum = 0
    for num in lst:
        current_sum += num
        result.append(current_sum)
    return result
turn_time = aggregate_numbers(turn_time)

In [None]:
#runnig it without loop
#opening up the loop
current_timings=0
timeline = []
command = ''
i = 0
extra_time = 0
# loop 1
print('-'*20)
trim_trigger = False
speaker_timing = 0
remaning_clip = 0
#pick a random video


if remaning_clip!=0:
    animation_duration = remaning_clip 
    print(f"picked {choice} of {animation_duration} the last remaining video\n")
else:
    choice = ['yes_long', 'fill', 'sip_coffee', 'nod'][2]
    animation_duration = animations_dict['male'][choice]['duration_ms']
    animation_duration_fixed = animation_duration
    print(f"picked {choice} of {animation_duration} through random choice\n")

print(f"remaining speaking time of speaker1 to switch actions {turn_time[i]} - {current_timings} = {turn_time[i]-current_timings} \n")
print('now going inside the condition \n')


if current_timings + animation_duration < turn_time[i]:
    current_timings += animation_duration
    print(f'added with lip move, current time is now {current_timings}\n')
    print(f'and remaining time is {turn_time[i]} - {current_timings} = {turn_time[i]-current_timings}\n')
    remaning_clip = 0
else:
    extra_time = turn_time[i] - current_timings
    print(f' extra time {extra_time} ')
    current_timings += animation_duration
    print(f'without lip move {current_timings} \n ')
    i+=1


print(current_timings)
# loop 2
print('-'*20)

if remaning_clip!=0:
    animation_duration = remaning_clip 
    print(f"picked {choice} of {animation_duration} the last remaining video\n")
else:
    choice = ['yes_long', 'fill', 'sip_coffee', 'nod'][2]
    animation_duration = animations_dict['male'][choice]['duration_ms']
    print(f"picked {choice} of {animation_duration} through random choice\n")

print(f"remaining speaking time of speaker1 to switch actions {turn_time[i]} - {current_timings} = {turn_time[i]-current_timings} \n")
print('now going inside the contition\n')


if current_timings + animation_duration < turn_time[i]:
    current_timings += animation_duration

    # adding the trimmed video based on condition if speaker 1 is speaking or is not
    if i%2 == 0:
        print(f'adding {choice} with lip move, current time is now {current_timings} \n ')
    else:
        print(f'adding {choice} without lip move, current time is now {current_timings} \n ')
    
    print(f'and remaining time is  {turn_time[i]-current_timings}\n')
    remaning_clip = 0

else:
    # switching state when the duration is longer than dialogue of speaker
    print(f'trimming the video from {remaning_clip} to {(turn_time[i]-current_timings)+remaning_clip} \n and setting the trim trigger to true \n  and increment turn_time \n')
    
    remaning_clip  = animation_duration-(turn_time[i]-current_timings)
    trim_trigger = True

    # adding the trimmed video based on condition if speaker 1 is speaking or is not
    current_timings += turn_time[i]-current_timings
    if i%2 == 0:
        print(f'adding {choice} with lip move, current time is now {current_timings} \n ')
    else:
        print(f'adding {choice} without lip move, current time is now {current_timings} \n ')

    
    # setting the choice and animation duration of the remaining clip
    print(f'settign the choice and animation duration = {remaning_clip} of remaining clip of {choice}\n')
    i+=1








# loop 3
print('-'*20)

if remaning_clip!=0:
    animation_duration = remaning_clip 
    print(f"picked {choice} of {animation_duration} the last remaining video\n")
else:
    choice = ['yes_long', 'fill', 'sip_coffee', 'nod'][2]
    animation_duration = animations_dict['male'][choice]['duration_ms']
    print(f"picked {choice} of {animation_duration} through random choice\n")


print(f"remaining speaking time of speaker1 to switch actions {turn_time[i]} - {current_timings} = {turn_time[i]-current_timings} \n")
print('now going inside the contition\n')


if current_timings + animation_duration < turn_time[i]:
    current_timings += animation_duration

    # adding the trimmed video based on condition if speaker 1 is speaking or is not
    if i%2 == 0:
        print(f'adding {choice} with lip move, current time is now {current_timings} \n ')
    else:
        print(f'adding {choice} without lip move, current time is now {current_timings} \n ')
    
    print(f'and remaining time is  {turn_time[i]-current_timings}\n')
    remaning_clip = 0


# switching state when the duration is longer than dialogue of speaker
else:
    # checking  if the video is trimmed before
    if trim_trigger == False:
        print(f'trimming the video from {remaning_clip} to {(turn_time[i]-current_timings)+remaning_clip} {remaning_clip} \n and setting the trim trigger to true \n  and increment turn_time \n')
        
        remaning_clip  = animation_duration-(turn_time[i]-current_timings)
        trim_trigger = True
    else:
        print(f'trimming the video from {remaning_clip} to {(turn_time[i]-current_timings)+remaning_clip} \n and setting the trim trigger to False \n  and increment turn_time \n')
        
        remaning_clip  = animation_duration-(turn_time[i]-current_timings)
        trim_trigger = False

    # adding the trimmed video based on condition if speaker 1 is speaking or is not
    current_timings += turn_time[i] - current_timings
    if i%2 == 0:
        print(f'adding {choice} with lip move, current time is now {current_timings} \n ')
    else:
        print(f'adding {choice} without lip move, current time is now {current_timings} \n ')

    
    # setting the choice and animation duration of the remaining clip
    print(f'settign the choice and animation duration = {remaning_clip} of remaining clip of {choice}\n')
    i+=1










# loop 4
print('-'*20)

if remaning_clip!=0:
    animation_duration = remaning_clip 
    print(f"picked {choice} of {animation_duration} the last remaining video\n")
else:
    choice = ['yes_long', 'fill', 'sip_coffee', 'nod'][2]
    animation_duration = animations_dict['male'][choice]['duration_ms']
    print(f"picked {choice} of {animation_duration} through random choice\n")


print(f"remaining speaking time of speaker1 to switch actions {turn_time[i]} - {current_timings} = {turn_time[i]-current_timings} \n")
print('now going inside the contition\n')


if current_timings + animation_duration < turn_time[i]:
    current_timings += animation_duration

    # adding the trimmed video based on condition if speaker 1 is speaking or is not
    if i%2 == 0:
        print(f'adding {choice} with lip move, current time is now {current_timings} \n ')
    else:
        print(f'adding {choice} without lip move, current time is now {current_timings} \n ')
    
    print(f'and remaining time is  {turn_time[i]-current_timings}\n')
    remaning_clip = 0

# switching state when the duration is longer than dialogue of speaker
else:
    # checking  if the video is trimmed before
    if trim_trigger == False:
        print(f'trimming the video from {remaning_clip} to {(turn_time[i]-current_timings)+remaning_clip} \n and setting the trim trigger to true \n  and increment turn_time \n')
        
        remaning_clip  = animation_duration-(turn_time[i]-current_timings)
        trim_trigger = True
    else:
        print(f'trimming the video from {remaning_clip} to {(turn_time[i]-current_timings)+remaning_clip} \n and setting the trim trigger to False \n  and increment turn_time \n')
        
        remaning_clip  = animation_duration-(turn_time[i]-current_timings)
        trim_trigger = False

    # adding the trimmed video based on condition if speaker 1 is speaking or is not
    current_timings += turn_time[i] - current_timings
    if i%2 == 0:
        print(f'adding {choice} with lip move, current time is now {current_timings} \n ')
    else:
        print(f'adding {choice} without lip move, current time is now {current_timings} \n ')

    
    # setting the choice and animation duration of the remaining clip
    print(f'settign the choice and animation duration = {remaning_clip} of remaining clip of {choice}\n')
    i+=1





# loop 5
print('-'*20)

if remaning_clip!=0:
    animation_duration = remaning_clip 
    print(f"picked {choice} of {animation_duration} the last remaining video\n")
else:
    choice = ['yes_long', 'fill', 'sip_coffee', 'nod'][2]
    animation_duration = animations_dict['male'][choice]['duration_ms']
    print(f"picked {choice} of {animation_duration} through random choice\n")


print(f"remaining speaking time of speaker1 to switch actions {turn_time[i]} - {current_timings} = {turn_time[i]-current_timings} \n")
print('now going inside the contition\n')


if current_timings + animation_duration < turn_time[i]:
    current_timings += animation_duration

    # adding the trimmed video based on condition if speaker 1 is speaking or is not
    if i%2 == 0:
        print(f'adding {choice} with lip move, current time is now {current_timings} \n ')
    else:
        print(f'adding {choice} without lip move, current time is now {current_timings} \n ')
    
    print(f'and remaining time is  {turn_time[i]-current_timings}\n')
    remaning_clip = 0

# switching state when the duration is longer than dialogue of speaker
else:
    # checking  if the video is trimmed before
    if trim_trigger == False:
        print(f'trimming the video from {remaning_clip} to {(turn_time[i]-current_timings)+remaning_clip} \n and setting the trim trigger to true \n  and increment turn_time \n')
        
        remaning_clip  = animation_duration-(turn_time[i]-current_timings)
        trim_trigger = True
    else:
        print(f'trimming the video from {remaning_clip} to {(turn_time[i]-current_timings)+remaning_clip} \n and setting the trim trigger to False \n  and increment turn_time \n')
        
        remaning_clip  = animation_duration-(turn_time[i]-current_timings)
        trim_trigger = False

    # adding the trimmed video based on condition if speaker 1 is speaking or is not
    current_timings += turn_time[i] - current_timings
    if i%2 == 0:
        print(f'adding {choice} with lip move, current time is now {current_timings} \n ')
    else:
        print(f'adding {choice} without lip move, current time is now {current_timings} \n ')

    
    # setting the choice and animation duration of the remaining clip
    print(f'settign the choice and animation duration = {remaning_clip} of remaining clip of {choice}\n')
    i+=1

--------------------
picked sip_coffee of 10030 through random choice

remaining speaking time of speaker1 to switch actions 16160 - 0 = 16160 

now going inside the condition 

added with lip move, current time is now 10030

and remaining time is 16160 - 10030 = 6130

10030
--------------------
picked sip_coffee of 10030 through random choice

remaining speaking time of speaker1 to switch actions 16160 - 10030 = 6130 

now going inside the contition

trimming the video from 0 to 6130 
 and setting the trim trigger to true 
  and increment turn_time 

adding sip_coffee with lip move, current time is now 16160 
 
settign the choice and animation duration = 3900 of remaining clip of sip_coffee

--------------------
picked sip_coffee of 3900 the last remaining video

remaining speaking time of speaker1 to switch actions 19179 - 16160 = 3019 

now going inside the contition

trimming the video from 3900 to 6919 
 and setting the trim trigger to False 
  and increment turn_time 

adding sip

In [17]:
# Initializations
current_timings = 0
timeline = []
command = ''
i = 0
extra_time = 0

trim_trigger = False
speaker_timing = 0
remaning_clip = 0

# We’ll keep picking choice from these 4 options:
possible_choices = ['yes_long', 'fill', 'sip_coffee', 'nod']

# --- Start the main loop ---
# We'll loop until we exhaust turn_time (i >= 5)).
while i < 5:
    print('-' * 20)

    # 1) Pick an animation or use remaining clipped video:
    if remaning_clip != 0:
        # Continue using the last remaining video
        animation_duration = remaning_clip
        print(f"picked {choice} of {animation_duration} ms (the last remaining video)\n")
    else:
        # Otherwise pick a new random (or fixed index) choice
        choice = possible_choices[2]  # e.g. always 'sip_coffee'
        animation_duration = animations_dict['male'][choice]['duration_ms']
        print(f"picked {choice} of {animation_duration} ms through random choice\n")

    # 2) Print the remaining speaking time
    print(f"remaining speaking time of speaker1 to switch actions "
          f"{turn_time[i]} - {current_timings} = {turn_time[i] - current_timings}\n")
    print("now going inside the condition...\n")

    # 3) If the entire animation fits before the speaker switches
    if current_timings + animation_duration < turn_time[i]:
        current_timings += animation_duration

        # Determine if speaker i is “with lip move” or “without lip move”
        if i % 2 == 0:
            print(f"adding {choice} with lip move, current time is now {current_timings}\n")
            
            # Now adding the the actual video to workspace folder
            animations_dict['male'][choice]['paths'][0]
        else:
            print(f"adding {choice} without lip move, current time is now {current_timings}\n")

        print(f"and remaining time is {turn_time[i] - current_timings}\n")
        
        # Since we used the full clip, reset remaning_clip
        remaning_clip = 0

    else:
        # 4) The animation does NOT fit fully => we do “trimming” logic

        # We will trim the part that fits until speaker switch
        # leftover clip is what remains after the speaker switches
        leftover_time = turn_time[i] - current_timings
        new_remaning_clip = animation_duration - leftover_time

        # Print out the trimming info
        print(f"trimming the video from {remaning_clip} to {leftover_time + remaning_clip} "
              f"\n and {'setting' if not trim_trigger else 'unsetting'} the trim trigger "
              f"\n and increment turn_time (i += 1)\n")

        remaning_clip = new_remaning_clip
        
        # Toggle trim_trigger based on your logic
        if not trim_trigger:
            trim_trigger = True
        else:
            trim_trigger = False
        
        # Add the trimmed portion up to the speaker switch
        current_timings += leftover_time

        # “with lip move” or “without lip move”
        if i % 2 == 0:
            print(f"adding {choice} with lip move, current time is now {current_timings}\n")
        else:
            print(f"adding {choice} without lip move, current time is now {current_timings}\n")

        # Print the leftover
        print(f"setting the choice and animation duration = {remaning_clip} "
              f"of remaining clip of {choice}\n")

        # Move to the next turn
        i += 1
        # Continue to the next iteration of the while loop
        # (We do not want to re-check the if/else with the same i)
        continue

    # 5) If we finished a turn exactly, you may want to increment i
    #    (depends on how you want to handle partial overlaps).
    if current_timings == turn_time[i]:
        i += 1

    # Move to the next iteration
    # If there's more turn_time left, the loop continues


--------------------
picked sip_coffee of 10030 ms through random choice

remaining speaking time of speaker1 to switch actions 16160 - 0 = 16160

now going inside the condition...

adding sip_coffee with lip move, current time is now 10030

and remaining time is 6130

--------------------
picked sip_coffee of 10030 ms through random choice

remaining speaking time of speaker1 to switch actions 16160 - 10030 = 6130

now going inside the condition...

trimming the video from 0 to 6130 
 and setting the trim trigger 
 and increment turn_time (i += 1)

adding sip_coffee with lip move, current time is now 16160

setting the choice and animation duration = 3900 of remaining clip of sip_coffee

--------------------
picked sip_coffee of 3900 ms (the last remaining video)

remaining speaking time of speaker1 to switch actions 19179 - 16160 = 3019

now going inside the condition...

trimming the video from 3900 to 6919 
 and unsetting the trim trigger 
 and increment turn_time (i += 1)

adding 

5