In [76]:
import pandas as pd
import os
import re
import random
import shutil

In [57]:
complete_images_dataset = pd.read_csv("CharadesEgo_v1_train.csv")

In [59]:
complete_images_dataset.shape

(6167, 13)

In [60]:
complete_images_dataset.head(1)

Unnamed: 0,id,subject,scene,quality,relevance,verified,script,objects,descriptions,actions,length,egocentric,charades_video
0,D3TR8EGO,2Q9D,Closet / Walk-in closet / Spear closet,7.0,7.0,Yes,A person walks into the closet and turns on th...,cup/glass/bottle;door;food;glass of water;ligh...,The person in the video walks into a room and ...,c156 3.90 12.00;c061 8.20 12.50;c106 9.90 18.4...,31.79,Yes,1K0SU


In [61]:
image_ids_available = os.listdir("data_images/CharadesEgo_v1_rgb")

In [62]:
curated_data = complete_images_dataset[complete_images_dataset["id"].isin(image_ids_available)]

In [63]:
curated_data.columns

Index(['id', 'subject', 'scene', 'quality', 'relevance', 'verified', 'script',
       'objects', 'descriptions', 'actions', 'length', 'egocentric',
       'charades_video'],
      dtype='object')

### Column Definitions

- id:
Unique identifier for each video. This id contains 'EGO' at the end for first person videos. That is, if a third-person video has the id ABCDE then the corresponding egocentric video is ABCDEEGO
- subject:
Unique identifier for each subject in the dataset
- scene:
One of 15 indoor scenes in the dataset, such as Kitchen
- quality:
The quality of the video judged by an annotator (7-point scale, 7=high quality)
- relevance: 
The relevance of the video to the script judged by an annotated (7-point scale, 7=very relevant)
- verified:
'Yes' if an annotator successfully verified that the video matches the script, else 'No'
- script:
The human-generated script used to generate the video
- descriptions:
Semicolon-separated list of descriptions by annotators watching the video
- actions:  
Semicolon-separated list of "class start end" triplets for each actions in the video, such as c092 11.90 21.20;c147 0.00 12.60
- length:
The length of the video in seconds
- egocentric:
'Yes' if the video is in first-person, else 'No'
- charades_video:
Identifier of the video in the Charades training set that has the same script to this video.'''

In [64]:
considered_data = curated_data[curated_data["verified"]== "Yes"]

### What i am trying to do:

- collect number of available images from the given id
- then get the 3 qartile image ranges
- then get one image from each quartile.
- copy image, create a text file
- text file should contain script, description, ", and ." separated sentences into each tags
- each line should be a new line

In [127]:

def get_three_random_images_filepath(id):
    NUM_TO_SELECT = 30    
    available_images = os.listdir("data_images/CharadesEgo_v1_rgb/"+str(id))
    if len(available_images) > NUM_TO_SELECT:
        NUM_TO_SELECT = len(available_images)-1
    random_names = random.sample(available_images, NUM_TO_SELECT)
    file_paths = ["data_images/CharadesEgo_v1_rgb/"+str(id)+"/"+names for names in random_names]
    
    return file_paths, random_names

In [128]:
regex_pattern = r"[,.]"
def get_text_content(script, desc):
    text_content = []
    script_check_flag = 0
    if script == desc:
        text_content.append(script.strip())
        script_check_flag+=1
    else:
        text_content.append(script.strip())
        text_content.append(desc.strip())
    
    if script_check_flag:
        tokenized_sentences = re.split(regex_pattern, script)
        for tokens in tokenized_sentences:
            if tokens:
                text_content.append(tokens.strip())
    else:
        tokenized_sentences = re.split(regex_pattern, script)
        tokenized_sentences += re.split(regex_pattern, script)
        for tokens in tokenized_sentences:
            if tokens:
                text_content.append(tokens.strip())
                
    return text_content

In [None]:
DESTINATION_FOLDER_NAME = "dataset_for_dalle_2nd_training"
for row in considered_data.itertuples(index=False):
    
    image_file_path, image_file_name = get_three_random_images_filepath(row[0])
    text_content = get_text_content(row[6], row[8])
    
    for _img_name,_img_path in zip(image_file_name,image_file_path):
        text_file_name = _img_name.split(".")[0]
        shutil.copy(_img_path,DESTINATION_FOLDER_NAME)
        with open(DESTINATION_FOLDER_NAME+"/"+text_file_name+".txt", "w") as file:
            for i,line in enumerate(text_content):
                if i == len(text_content)-1:
                    file.writelines(line)
                else:
                    file.writelines(line+"\n")