In [1]:
from IPython.display import Video
import pandas as pd
import os

In [2]:
!pip install ipywidgets



In [3]:
# Video folder
video_folder = "./clips"
name_folder = './parts'
part = 'video_files_part_1.csv'

clip_paths = pd.read_csv(os.path.join(name_folder, part))['File Name']

### Play the clip & start annotation

In [4]:
# load json file
import json

transcripts = json.load(open('text_data.json', 'r'))


In [5]:
len(transcripts)

15887

In [7]:
# all_cnt = 0
# name_set = set()
# for i in range(1, 8):
#     part = f'video_files_part_{i}.csv'
#     clip_paths = pd.read_csv(os.path.join(name_folder, part))['File Name']
#     cnt = 0
#     for path in clip_paths:
#         name = path.replace('./clips/', '')
#         name = name.replace('.mp4', '')
#         # for alignment
#         name = name.replace('_split_', '_clip_')
# #         print(name)

#         if name in transcripts.keys():
#     #         display(Video(path, width=600, height=400))
#             cnt += 1
#             name_set.add(name)
# #         else:
# #             print(name)
#     all_cnt += cnt
    
# intersection_set = name_set.intersection(transcripts)
# print(len(intersection_set) == len(transcripts) == len(name_set))

### Data Format

violin_annotation.json contains clip info, subtitles, and human written real/fake statements.

violin_annotation.json is a dict with file_id as keys, and each element describes a clip. Here is an example:
'friends_s07e07_clip_1056_1080_039':{
		'file':'friends_s07e07_clip_1056_1080_039',
		'source':'friends_s07e07',
		'split':'train',
		'span':[1055.9888, 1080.1232], 
		'statement':[[real1, fake1], [real2, fake2], [real3, fake3]],
		'sub':[['Good morining!', [110, 2134]],['xxx', [xxx, xxx]], ...]
	}


'source' is the source of the video clip. If the clip comes from Youtube, 'source' indicates its Youtube id.

'span' indicates the time interval that the clip is cut from the original video (in seconds).

'statement' is annotation from AMT, containing three pairs of real and fake statements.

'sub' is the subtitles associated with the clip. Each line is paired with its time interval (**within** the clip, in milliseconds).

In [6]:
from IPython.display import Video, display, clear_output
import ipywidgets as widgets
import json

# Data structure to store user inputs for strengtheners and weakeners
results = []
current_video_index = 0  # Track the current video index

def save_results_to_json(file_name='results.json'):
    with open(file_name, 'w') as json_file:
        json.dump(results, json_file, indent=4)
    print(f"Results successfully saved to {file_name}")

def save_data_to_dict(original_dict, all_statements):
    # Create a new dictionary by copying the original dictionary
    new_entry = original_dict.copy()
    
    # Add the strengthener, weakener, and their respective spans to the new dictionary for all statements
    new_entry['statements'] = all_statements
    
    # Save the updated dictionary into the results list
    results.append(new_entry)
    
    print("Data saved!")
    print(f"Current data: {results}")
    
    # Move to the next video
    show_next()

def show_next():
    global current_video_index
    
    # Check if we've exhausted all videos
    if current_video_index >= len(clip_paths):
        print("All videos have been annotated.")
        return
    
    path = clip_paths[current_video_index]
    name = path.replace('./clips/', '')
    name = name.replace('.mp4', '')
    name = name.replace('_split_', '_clip_')
    
    print(f"Processing video: {name} (index {current_video_index})")
    
    if name in transcripts.keys():
        # Clear previous displays
        clear_output(wait=True)

        # Display the video
        display(Video(path, width=600, height=400))
        
        # Display subtitles
        print("Subtitles:")
        for sub in transcripts[name]['sub']:
            subtitle_text, time_span = sub[0], sub[1]
            print(f"{time_span[0]}ms - {time_span[1]}ms: {subtitle_text}")
        
        # Prepare a list to hold all statements' data
        all_statements = []
        
        # Loop over all statements for the current video
        for statement in transcripts[name]['statement']:
            real_state, fake_state = statement[0], statement[1]
            
            # Display the real statement
            print(f"Real Statement: {real_state}")
            
            # Create inputs for the real statement span (start and end times in seconds)
            real_statement_start_input = widgets.FloatText(
                placeholder='Real statement start...',
                description='Real Start:',
                layout=widgets.Layout(width='45%')
            )
            real_statement_end_input = widgets.FloatText(
                placeholder='Real statement end...',
                description='Real End:',
                layout=widgets.Layout(width='45%')
            )
            real_statement_span_input = widgets.HBox([real_statement_start_input, real_statement_end_input])

            # Create a text area for inputting the strengthener
            strengthener_input = widgets.Textarea(
                placeholder='Type your strengthener here...',
                description='Strengthener:',
                layout=widgets.Layout(width='100%')
            )
            
            # Create inputs for the strengthener span (start and end times in seconds)
            strengthener_start_input = widgets.FloatText(
                placeholder='Start time...',
                description='Strengthener Start:',
                layout=widgets.Layout(width='45%')
            )
            strengthener_end_input = widgets.FloatText(
                placeholder='End time...',
                description='Strengthener End:',
                layout=widgets.Layout(width='45%')
            )
            strengthener_span_input = widgets.HBox([strengthener_start_input, strengthener_end_input])

            # Display the fake statement
            print(f"Fake Statement: {fake_state}")
            
            # Create inputs for the fake statement span (start and end times in seconds)
            fake_statement_start_input = widgets.FloatText(
                placeholder='Fake statement start...',
                description='Fake Start:',
                layout=widgets.Layout(width='45%')
            )
            fake_statement_end_input = widgets.FloatText(
                placeholder='Fake statement end...',
                description='Fake End:',
                layout=widgets.Layout(width='45%')
            )
            fake_statement_span_input = widgets.HBox([fake_statement_start_input, fake_statement_end_input])

            # Create a text area for inputting the weakener
            weakener_input = widgets.Textarea(
                placeholder='Type your weakener here...',
                description='Weakener:',
                layout=widgets.Layout(width='100%')
            )
            
            # Create inputs for the weakener span (start and end times in seconds)
            weakener_start_input = widgets.FloatText(
                placeholder='Start time...',
                description='Weakener Start:',
                layout=widgets.Layout(width='45%')
            )
            weakener_end_input = widgets.FloatText(
                placeholder='End time...',
                description='Weakener End:',
                layout=widgets.Layout(width='45%')
            )
            weakener_span_input = widgets.HBox([weakener_start_input, weakener_end_input])
            
            # Collect the statement data
            all_statements.append({
                'real_statement': real_state,
                'real_statement_span': {
                    'start': real_statement_start_input,
                    'end': real_statement_end_input
                },
                'strengthener': strengthener_input,
                'strengthener_span': {
                    'start': strengthener_start_input,
                    'end': strengthener_end_input
                },
                'fake_statement': fake_state,
                'fake_statement_span': {
                    'start': fake_statement_start_input,
                    'end': fake_statement_end_input
                },
                'weakener': weakener_input,
                'weakener_span': {
                    'start': weakener_start_input,
                    'end': weakener_end_input
                }
            })
            
            # Display the input fields
            display(real_statement_span_input)
            display(strengthener_input)
            display(strengthener_span_input)
            display(fake_statement_span_input)
            display(weakener_input)
            display(weakener_span_input)
        
        # Create a save button
        save_button = widgets.Button(description="Save All")
        
        # Define the save button's click event handler
        def on_save_button_clicked(b):
            global current_video_index  # Mark current_video_index as global
            
            # Extract values from all input widgets
            for item in all_statements:
                item['real_statement_span'] = {
                    'start': item['real_statement_span']['start'].value,
                    'end': item['real_statement_span']['end'].value
                }
                item['strengthener'] = item['strengthener'].value
                item['strengthener_span'] = {
                    'start': item['strengthener_span']['start'].value,
                    'end': item['strengthener_span']['end'].value
                }
                item['fake_statement_span'] = {
                    'start': item['fake_statement_span']['start'].value,
                    'end': item['fake_statement_span']['end'].value
                }
                item['weakener'] = item['weakener'].value
                item['weakener_span'] = {
                    'start': item['weakener_span']['start'].value,
                    'end': item['weakener_span']['end'].value
                }
            
            save_data_to_dict(transcripts[name], all_statements)
            current_video_index += 1
        
        save_button.on_click(on_save_button_clicked)
        
        # Display the save button
        display(save_button)
    else:
        print(f"No transcript found for: {name}")  # Debugging print
        current_video_index += 1
        show_next()

# Start the annotation process
show_next()

Subtitles:
29ms - 2159ms: ROSS :
Hey, Ross, it's you.
2323ms - 4403ms: I just want you
to remember this feeling.
5034ms - 7654ms: You are lucky to be alive.
7870ms - 11120ms: So live every day to the fullest.
11373ms - 14283ms: Love yourself, okay?
14835ms - 16835ms: And also get stamps. Bye.
18547ms - 22047ms: Play that message for Emily,
and this problem goes away.
22301ms - 23351ms: Right?
24970ms - 26770ms: Anybody want to meet a hero?
27014ms - 27610ms: John Glenn is here?
Real Statement: The two women are standing there to the man talk on the answering machine .
Fake Statement: The two women are standing there listening to the woman talk on the answering machine .


HBox(children=(FloatText(value=0.0, description='Real Start:', layout=Layout(width='45%')), FloatText(value=0.…

Textarea(value='', description='Strengthener:', layout=Layout(width='100%'), placeholder='Type your strengthen…

HBox(children=(FloatText(value=0.0, description='Strengthener Start:', layout=Layout(width='45%')), FloatText(…

HBox(children=(FloatText(value=0.0, description='Fake Start:', layout=Layout(width='45%')), FloatText(value=0.…

Textarea(value='', description='Weakener:', layout=Layout(width='100%'), placeholder='Type your weakener here.…

HBox(children=(FloatText(value=0.0, description='Weakener Start:', layout=Layout(width='45%')), FloatText(valu…

Real Statement: The woman in the brown shirt is holding her face  while the woman   in the black outfit   is holding a purple blender .
Fake Statement: The woman in the black brown shirt it's a holding a purple blender while the woman in the black outfit is holding her face .


HBox(children=(FloatText(value=0.0, description='Real Start:', layout=Layout(width='45%')), FloatText(value=0.…

Textarea(value='', description='Strengthener:', layout=Layout(width='100%'), placeholder='Type your strengthen…

HBox(children=(FloatText(value=0.0, description='Strengthener Start:', layout=Layout(width='45%')), FloatText(…

HBox(children=(FloatText(value=0.0, description='Fake Start:', layout=Layout(width='45%')), FloatText(value=0.…

Textarea(value='', description='Weakener:', layout=Layout(width='100%'), placeholder='Type your weakener here.…

HBox(children=(FloatText(value=0.0, description='Weakener Start:', layout=Layout(width='45%')), FloatText(valu…

Real Statement: The woman in the black outfit    said that the message on the answering machine will make the problem go away .
Fake Statement: The woman in the green shirt is reading a book while sitting at a table.


HBox(children=(FloatText(value=0.0, description='Real Start:', layout=Layout(width='45%')), FloatText(value=0.…

Textarea(value='', description='Strengthener:', layout=Layout(width='100%'), placeholder='Type your strengthen…

HBox(children=(FloatText(value=0.0, description='Strengthener Start:', layout=Layout(width='45%')), FloatText(…

HBox(children=(FloatText(value=0.0, description='Fake Start:', layout=Layout(width='45%')), FloatText(value=0.…

Textarea(value='', description='Weakener:', layout=Layout(width='100%'), placeholder='Type your weakener here.…

HBox(children=(FloatText(value=0.0, description='Weakener Start:', layout=Layout(width='45%')), FloatText(valu…

Button(description='Save All', style=ButtonStyle())

In [23]:
# Function to export results to a CSV file using pandas
def export_results_to_csv(file_name='results.csv'):
    # Prepare the data for the DataFrame
    data = []
    for entry in results:
        for statement in entry['statements']:
            data.append({
                'video_file': entry['file'],
                'real_statement': statement['real_statement'],
                'strengthener': statement['strengthener'],
                'strengthener_span': statement['strengthener_span'],
                'fake_statement': statement['fake_statement'],
                'weakener': statement['weakener'],
                'weakener_span': statement['weakener_span']
            })
    
    # Create a DataFrame
    df = pd.DataFrame(data)
    
    # Export the DataFrame to a CSV file
    df.to_csv(file_name, index=False)
    
    print(f"Results successfully exported to {file_name}")

# Example usage:
export_results_to_csv('annotated_results.csv')


2309