# AutoSFX

In [9]:
import importlib
import classify
import scene_understanding
import sync
from PIL import Image
import openai
import os
from openai import OpenAI
import cv2

## Classify

In [25]:
importlib.reload(scene_understanding)

<module 'scene_understanding' from '/home/s5614279/Master Project/AutoSFX/scene_understanding.py'>

In [4]:
importlib.reload(classify)

<module 'classify' from '/home/s5614279/Master Project/AutoSFX/classify.py'>

In [2]:
running_video = "/home/s5614279/Master Project/AutoSFX/data/video/running.mp4"  # your video path
firework_video = "/home/s5614279/Master Project/AutoSFX/data/video/firework.mp4"
train_video = "/home/s5614279/Master Project/AutoSFX/data/video/train.mp4"

### Scene Understanding

In [None]:
# create SceneUnderstanding object
scene_understanding = scene_understanding.SceneUnderstanding()


resized_frame = scene_understanding.process_video(running_video)
#frame_tags = []
#frame_context = []

frame_values = []
frame_objects = []

#output_dir = 'output_frames'
#os.makedirs(output_dir, exist_ok=True)

#for i, frame in enumerate(resized_frame[::15]):
for i, frame in enumerate(resized_frame[::5]):
    pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    #tags, context = scene_understanding.analyze_image(pil_image)
    #frame_tags.append(tags)
    #frame_context.append(context)

    values, objects = scene_understanding.analyze_image(pil_image)
    frame_score = {

    }
    frame_values.append(values)
    frame_objects.append(objects)

    #image_save_path = os.path.join(output_dir, f'frame_{i:04d}.jpg')
    #pil_image.save(image_save_path)


client = OpenAI()

api_key = os.getenv('OPENAI_API_KEY')

openai.api_key = api_key

completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": "You will be provided with a context describing a scene, and your task is to give the sound suggestions that collectively form the audio landscape of the described scene"},
    {"role": "user", "content": f"What do I hear in the following video description, give me a list: {frame_context[0]}"}
  ]
)

print(completion.choices[0].message)


### Classify

In [27]:
classify = classify.Classify()

resized_frame = classify.process_video(running_video)

frame_values = []
frame_objects = []
frame_scores = []

for i, frame in enumerate(resized_frame[::2]):
    pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    values, objects = classify.recognize_objects(pil_image)
    frame_score = {
        'object': objects, #top 5 
        'score': values
    }
    frame_values.append(values)
    frame_objects.append(objects)
    frame_scores.append(frame_score)


Now you can see the interfer result of every frame.

## Sync

In [3]:
importlib.reload(sync)

syncer = sync.ObjectIntervalSync(train_video)
syncer.analyze_frames()
syncer.calculate_intervals()
intervals = syncer.get_intervals()
ambience = syncer.get_ambience()

print(intervals)
print(f"Selected ambience: {ambience}")


{'train': [(1, 74, 2.433333333333333)]}
Selected ambience: rainy


We have the intervals, and then we can retrieve the sound effects audio based on the intervals.

## Retrieve

In [15]:
import retrieve_simplify
import IPython.display as ipd
import merge
import importlib

importlib.reload(retrieve_simplify)
importlib.reload(merge)

video_path = "/home/s5614279/Master Project/AutoSFX/data/video/firework.mp4"
csv_path = "/home/s5614279/Master Project/AutoSFX/data/meta/esc50.csv"
audio_folder = "/home/s5614279/Master Project/AutoSFX/data/audio/"
ambience_folder = "/home/s5614279/Master Project/AutoSFX/data/ambience/"
output_folder = "/home/s5614279/Master Project/AutoSFX/output_video/"

matched_effect_audios, ambience_audio, ambience_type = retrieve_simplify.retrieve_audio(video_path, csv_path, audio_folder, ambience_folder)

print(f"Selected ambience: {ambience_type}")
if ambience_audio:
    print(f"Ambience audio file: {ambience_audio['ambience_file']}")
    display(ipd.Audio(ambience_audio['ambience_file']))


if not os.path.exists(output_folder):
    os.makedirs(output_folder)


for obj, audio_lists in matched_effect_audios.items():
    for i, audio_options in enumerate(audio_lists):
        print(f"\nObject: {obj}, Interval {i+1}")
        for j, audio in enumerate(audio_options, 1):
            print(f"{j}. {audio['effect_file']} (duration: {audio['effect_duration']:.2f}s)")
            display(ipd.Audio(audio['effect_file']))
        
        choice = int(input(f"Choose the best audio for {obj}, Interval {i+1} (1-{len(audio_options)}): ")) - 1
        chosen_audio = audio_options[choice]
        
        output_path = os.path.join(output_folder, f"{obj}_interval_{i+1}_merged.mp4")
        print(f"Merging audio for object: {obj}, Interval {i+1}")
        # success = merge.merge_audio_video(video_path, [chosen_audio], matched_effect_audios, output_path)
        success = merge.merge_audio_video(video_path, [chosen_audio], ambience_audio, output_path)
        
        if success:
            print(f"Playing merged video: {output_path}")
            display(ipd.Video(output_path))
        else:
            print(f"Failed to merge audio and video for object: {obj}, Interval {i+1}")

Object: fireworks, Interval duration: 2.47 seconds
effect file: /home/s5614279/Master Project/AutoSFX/data/audio/1-115545-A-48.wav, Valid duration: 2.45 seconds
effect file: /home/s5614279/Master Project/AutoSFX/data/audio/1-115545-B-48.wav, Valid duration: 0.73 seconds
effect file: /home/s5614279/Master Project/AutoSFX/data/audio/1-115545-C-48.wav, Valid duration: 0.82 seconds
effect file: /home/s5614279/Master Project/AutoSFX/data/audio/1-115546-A-48.wav, Valid duration: 0.31 seconds
effect file: /home/s5614279/Master Project/AutoSFX/data/audio/1-160563-A-48.wav, Valid duration: 3.15 seconds
effect file: /home/s5614279/Master Project/AutoSFX/data/audio/1-160563-B-48.wav, Valid duration: 4.41 seconds
effect file: /home/s5614279/Master Project/AutoSFX/data/audio/1-25777-A-48.wav, Valid duration: 3.74 seconds
effect file: /home/s5614279/Master Project/AutoSFX/data/audio/1-25781-A-48.wav, Valid duration: 3.80 seconds
effect file: /home/s5614279/Master Project/AutoSFX/data/audio/2-117615-

2. /home/s5614279/Master Project/AutoSFX/data/audio/5-160614-B-48.wav (duration: 5.00s)


3. /home/s5614279/Master Project/AutoSFX/data/audio/5-160614-E-48.wav (duration: 5.00s)


4. /home/s5614279/Master Project/AutoSFX/data/audio/5-160614-F-48.wav (duration: 5.00s)


5. /home/s5614279/Master Project/AutoSFX/data/audio/5-160614-H-48.wav (duration: 5.00s)


Merging audio for object: fireworks, Interval 1
Moviepy - Building video /home/s5614279/Master Project/AutoSFX/output_video/fireworks_interval_1_merged.mp4.
MoviePy - Writing audio in fireworks_interval_1_mergedTEMP_MPY_wvf_snd.mp4


                                                        

MoviePy - Done.
Moviepy - Writing video /home/s5614279/Master Project/AutoSFX/output_video/fireworks_interval_1_merged.mp4



                                                               

Moviepy - Done !
Moviepy - video ready /home/s5614279/Master Project/AutoSFX/output_video/fireworks_interval_1_merged.mp4
Playing merged video: /home/s5614279/Master Project/AutoSFX/output_video/fireworks_interval_1_merged.mp4
