In [36]:
import os
import json
import pandas as pd

In [40]:
class Annotator:
    def __init__(self, database: str = None, config: str = None):
        self.database_path = database
        if os.path.exists(database):
            self.database = pd.read_csv(database)
        else:
            self.database = pd.DataFrame(columns=['video_name', 'label'])
        self.config_path = config
        if os.path.exists(config):
            with open(config, 'r') as f:
                self.config = json.load(f)
        else:
            self.config = {
                'current_task': None,
                'saved_tasks': [],
            }
            
    def save(self):
        self.database.to_csv(self.database_path, index=False)
        with open(self.config_path, 'w') as f:
            json.dump(self.config, f)
        
    def load(self):
        self.database = pd.read_csv(self.database)
        with open(self.config_path, 'r') as f:
            self.config = json.load(f)
        
    def add_tasks(self, tasks: str | list):
        # tasks is a list of video names that need to be annotated. Set the label to None
        if type(tasks) == str:
            tasks = [tasks]
        for task in tasks:
            if task not in self.database['video_name'].tolist():
                self.database.loc[len(self.database)] = [task, None]
            
    def annotate(self, video_name: str, label: str):
        self.database.loc[self.database['video_name'] == video_name, 'label'] = label
        self.config['saved_tasks'].append(video_name)
        # if memory is more than 10, remove the oldest task
        if len(self.config['saved_tasks']) > 10:
            self.config['saved_tasks'] = self.config['saved_tasks'][1:]
        
    def get_unannotated(self):
        return self.database[self.database['label'].isnull()]
    
    def get_new_task(self):
        return self.get_unannotated().iloc[0]['video_name']
    
    def go_back(self):
        if len(self.config['saved_tasks']) > 0:
            self.database.loc[self.database['video_name'] == self.config['saved_tasks'][-1], 'label'] = None
            self.config['saved_tasks'] = self.config['saved_tasks'][:-1]

In [45]:
annotator = Annotator('database.csv', 'config.json')
annotator.add_tasks(['video1', 'video2', 'video3'])
annotator.annotate('video1', 'dog')
annotator.save()
display(annotator.get_unannotated())
annotator.add_tasks('video4')
display(annotator.get_unannotated())
annotator.annotate('video4', 'cat')
display(annotator.get_unannotated())
display(annotator.get_new_task())
annotator.annotate('video2', 'cat')
display(annotator.get_unannotated())
annotator.go_back()
display(annotator.get_unannotated())
annotator.save()

Unnamed: 0,video_name,label
1,video2,
2,video3,


Unnamed: 0,video_name,label
1,video2,
2,video3,
3,video4,


Unnamed: 0,video_name,label
1,video2,
2,video3,


'video2'

Unnamed: 0,video_name,label
2,video3,


Unnamed: 0,video_name,label
1,video2,
2,video3,
