In [1]:
import numpy as np
import cv2
from copy import deepcopy
import os
from PIL import Image
import matplotlib.pyplot as plt
import math
from num2words import num2words
import random

In [2]:
class ObjVar:
    def __init__(self, position, bbox, grids, class_name, area):
        self.position = position
        self.bbox = bbox
        self.grids = grids
        self.class_name = class_name
        self.area = area
        

class ObjDetector(object):
    def __init__(self):
        self.labels_dict = {
            0:  {'name': 'unlabeled',      'color': (0,   0,   0),   'ct_scape_id': 0,  'train_id': 255},
            1:  {'name': 'road',           'color': (128, 64,  128), 'ct_scape_id': 7,  'train_id': 1},
            2:  {'name': 'sidewalk',       'color': (244, 35,  232), 'ct_scape_id': 8,  'train_id': 2},
            3:  {'name': 'building',       'color': (70,  70,  70),  'ct_scape_id': 11, 'train_id': 3},
            4:  {'name': 'wall',           'color': (102, 102, 156), 'ct_scape_id': 12, 'train_id': 4},
            5:  {'name': 'fence',          'color': (190, 153, 153), 'ct_scape_id': 13, 'train_id': 5},
            6:  {'name': 'pole',           'color': (153, 153, 153), 'ct_scape_id': 17, 'train_id': 6},
            7:  {'name': 'traffic light',  'color': (250, 170, 30),  'ct_scape_id': 19, 'train_id': 7},
            8:  {'name': 'traffic sign',   'color': (220, 220, 0),   'ct_scape_id': 20, 'train_id': 8},
            9:  {'name': 'vegetation',     'color': (107, 142, 35),  'ct_scape_id': 21, 'train_id': 9},
            10: {'name': 'terrain',        'color': (152, 251, 152), 'ct_scape_id': 22, 'train_id': 10},
            11: {'name': 'sky',            'color': (70,  130, 180), 'ct_scape_id': 23, 'train_id': 11},
            12: {'name': 'person',         'color': (220, 20,  60),  'ct_scape_id': 24, 'train_id': 12},
            13: {'name': 'rider',          'color': (255, 0,   0),   'ct_scape_id': 25, 'train_id': 13},
            14: {'name': 'car',            'color': (0,   0,   142), 'ct_scape_id': 26, 'train_id': 14},
            15: {'name': 'truck',          'color': (0,   0,   70),  'ct_scape_id': 27, 'train_id': 15},
            16: {'name': 'bus',            'color': (0,   60,  100), 'ct_scape_id': 28, 'train_id': 16},
            17: {'name': 'train',          'color': (0,   80,  100), 'ct_scape_id': 31, 'train_id': 17},
            18: {'name': 'motorcycle',     'color': (0,   0,   230), 'ct_scape_id': 32, 'train_id': 18},
            19: {'name': 'bicycle',        'color': (119, 11,  32),  'ct_scape_id': 33, 'train_id': 19},
            20: {'name': 'dynamic',        'color': (111, 74,  0),   'ct_scape_id': 5,  'train_id': 20},
            21: {'name': 'ground',         'color': (81,  0,   81),  'ct_scape_id': 6,  'train_id': 21},
            22: {'name': 'parking',        'color': (250, 170, 160), 'ct_scape_id': 9,  'train_id': 22},
            23: {'name': 'rail track',     'color': (230, 150, 140), 'ct_scape_id': 10, 'train_id': 23},
            24: {'name': 'guard rail',     'color': (180, 165, 180), 'ct_scape_id': 14, 'train_id': 24},
            25: {'name': 'bridge',         'color': (150, 100, 100), 'ct_scape_id': 15, 'train_id': 25},
            26: {'name': 'tunnel',         'color': (150, 120, 90),  'ct_scape_id': 16, 'train_id': 26},
            27: {'name': 'polegroup',      'color': (153, 153, 153), 'ct_scape_id': 18, 'train_id': 27},
            28: {'name': 'caravan',        'color': (0,   0,   90),  'ct_scape_id': 29, 'train_id': 28},
            29: {'name': 'trailer',        'color': (0,   0,   110), 'ct_scape_id': 30, 'train_id': 29}
        }
        
    def get_grid_span(self, x, y, w, h, h_g_l, v_g_l):
        g_x = []
        g_y = []
        
        i = 1
        while not (x >= h_g_l[i - 1] and x < h_g_l[i]):
            i += 1
        g_x.append(i)

        while not (x+w >= h_g_l[i - 1] and x+w < h_g_l[i]):
            i += 1  
        if not i in g_x:
            g_x.append(i)
        if len(g_x) > 1:
            for ii in range(g_x[0]+1, g_x[1]):
                g_x.append(ii)
                
        g_x.sort()
        
        j = 1
        while not (y >= v_g_l[j - 1] and y < v_g_l[j]):
            j += 1 
        g_y.append(j)

        while not (y+h >= v_g_l[j - 1] and y+h < v_g_l[j]):
            j += 1  
        if not j in g_y:
            g_y.append(j)
        if len(g_y) > 1:
            for jj in range(g_y[0]+1, g_y[1]):
                g_y.append(jj)      
        g_y.sort()
            
        final_g = []
        for x in g_x:
            for y in g_y:
                final_g.append((y-1)*3 + x)
                
        return final_g

    
    def get_localization(self, img_lbl, detect_obj=[], grid_count=3):
        obj_dict = {}
        for key in self.labels_dict.keys():
            obj_id = self.labels_dict[key]['train_id']
            temp_img = deepcopy(img_lbl)
            temp_img[temp_img != obj_id] = 0
            temp_img[temp_img == obj_id] = 255
            
            temp_img = cv2.GaussianBlur(temp_img, (25, 25), 0)
            temp_img = cv2.dilate(temp_img, np.ones((20, 20), np.uint8), iterations=1)
            temp_img = cv2.erode(temp_img, np.ones((10, 10), np.uint8), iterations=1)

            th = int(np.max(temp_img) * 0.15)
            temp_img[temp_img > th] = 255
            temp_img[temp_img <= th] = 0
            
            contours, hierarchy = cv2.findContours(
                temp_img.astype(np.uint8), 
                cv2.RETR_EXTERNAL, 
                cv2.CHAIN_APPROX_SIMPLE
            )
            
            horizontal_grid_lines = [(i) * (img_lbl.shape[1] // 3) for i in range(grid_count + 1)]
            vertical_grid_lines =  [(i) * (img_lbl.shape[0] // 3) for i in range(grid_count + 1)]
            
            horizontal_grid_lines[-1] = horizontal_grid_lines[-1] + 1
            vertical_grid_lines[-1] = vertical_grid_lines[-1] + 1
       
            for cnt in contours:
                area = cv2.contourArea(cnt)
                if area < 500:
                    continue

                x,y,w,h = cv2.boundingRect(cnt)
                
                if x >= img_lbl.shape[1]:
                    x = x - 1
                
                if x+w >= img_lbl.shape[1]:
                    w = w - 1
                
                if y >= img_lbl.shape[0]:
                    y = y - 1
                
                if y+h >= img_lbl.shape[0]:
                    h = h - 1 

                if h>20 and w>20:
                    if key not in obj_dict.keys():
                        obj_dict[key] = []
                        
                    grids = self.get_grid_span(x, y, w, h, horizontal_grid_lines, vertical_grid_lines)
                    if 7 in grids or 8 in grids or 9 in grids:
                        continue
                        
                    cur_obj = ObjVar(
                        position=[(y + (h//2)), (x + (w//2))],
                        bbox=[y, x, y+h, x+w],
                        grids=grids,
                        class_name=self.labels_dict[key]['name'],
                        area=area
                    )
                    
                    obj_dict[key].append(cur_obj)
                    
            if key not in obj_dict.keys():
                obj_dict[key] = []

        return obj_dict

In [3]:
def box_iou(a, b): 
    w_intersection = np.maximum (0, (np.minimum(a[2], b[2]) - np.maximum(a[0], b[0])))
    h_intersection = np.maximum (0, (np.minimum(a[3], b[3]) - np.maximum(a[1], b[1])))
    s_intersection = w_intersection * h_intersection
    s_a = (a[2] - a[0])*(a[3] - a[1])
    s_b = (b[2] - b[0])*(b[3] - b[1])
  
    return float(s_intersection)/(s_a + s_b - s_intersection)

In [4]:
image_dir = os.path.join(os.getcwd(), '../des_gen_img')

source_images = [os.path.join(image_dir, img) for img in os.listdir(image_dir) if '.png' in img]
source_images.sort()


In [5]:
ignores = []
movings = [12, 13, 14, 15, 16, 17, 18, 19, 28, 29]
statics = [2, 3, 4, 5, 6, 7, 8 , 9, 22, 23, 24, 25, 26, 27, 0, 10, 11, 20, 21]
dist_th = 50
iou_th = 0.25

description_dictionary = {}
old_obj_dict = None

for i in range(len(source_images)):
    img = np.asarray(Image.open(source_images[i]))

    obj_detector = ObjDetector()
    new_obj_dict = obj_detector.get_localization(img_lbl=img)
    
    description_dictionary[i] = {}
    
    if old_obj_dict:
        for k in range(30):
            description_dictionary[i][k] = {
                "appear": 0,
                "appear_grids": [],
                "disappear": 0,
                "disappear_grids": [],
                "move": 0,
                "move_from": [],
                "move_from_grids": [],
                "move_to": [],
                "move_to_grids": []
            }
                
            if k in ignores:
                continue
            elif k in movings or k in statics:
                connections = []
                for no, new_obj in enumerate(new_obj_dict[k]):
                    new_pos = new_obj.position
                    new_bbox = new_obj.bbox
                    new_grid = new_obj.grids
                    for oo, old_obj in enumerate(old_obj_dict[k]):
                        old_pos = old_obj.position
                        old_bbox = old_obj.bbox
                        old_grid = old_obj.grids
                        distance = math.dist(new_pos, old_pos)
                        iou = box_iou(new_bbox, old_bbox)
                        if iou > iou_th and distance < dist_th:
                            connections.append(np.array([oo, no])) 
                
                if connections:
                    connections = np.array(connections)
                    for oo in range(len(old_obj_dict[k])):
                        if oo not in connections[:, 0]:
                            description_dictionary[i][k]["disappear"] += 1
                            description_dictionary[i][k]["disappear_grids"].append(
                                old_obj_dict[k][oo].grids
                            ) 
                        else:
                            description_dictionary[i][k]["move"] += 1
                            frm_ind = connections[np.argwhere(connections[:, 0] == oo)[0][0]]
                            description_dictionary[i][k]["move_from"].append(old_obj_dict[k][frm_ind[0]].position)
                            description_dictionary[i][k]["move_to"].append(new_obj_dict[k][frm_ind[1]].position)
                            description_dictionary[i][k]["move_from_grids"].append(old_obj_dict[k][frm_ind[0]].grids)
                            description_dictionary[i][k]["move_to_grids"].append(new_obj_dict[k][frm_ind[1]].grids)

                    for no in range(len(new_obj_dict[k])):
                        if no not in connections[:, 1]:
                            description_dictionary[i][k]["appear"] += 1
                            description_dictionary[i][k]["appear_grids"].append(
                                new_obj_dict[k][no].grids
                            ) 
                            
    else:
        for k in range(30):
            description_dictionary[i][k] = {
                "appear": 0,
                "appear_grids": [],
                "disappear": 0,
                "disappear_grids": [],
                "move": 0,
                "move_from": [],
                "move_from_grids": [],
                "move_to": [],
                "move_to_grids": []
            }
            if k in ignores:
                continue
            for no in range(len(new_obj_dict[k])):
                description_dictionary[i][k]["appear"] += 1
                description_dictionary[i][k]["appear_grids"].append(new_obj_dict[k][no].grids) 

    old_obj_dict = new_obj_dict
                        
description_dictionary

{0: {0: {'appear': 0,
   'appear_grids': [],
   'disappear': 0,
   'disappear_grids': [],
   'move': 0,
   'move_from': [],
   'move_from_grids': [],
   'move_to': [],
   'move_to_grids': []},
  1: {'appear': 0,
   'appear_grids': [],
   'disappear': 0,
   'disappear_grids': [],
   'move': 0,
   'move_from': [],
   'move_from_grids': [],
   'move_to': [],
   'move_to_grids': []},
  2: {'appear': 2,
   'appear_grids': [[4], [5, 6]],
   'disappear': 0,
   'disappear_grids': [],
   'move': 0,
   'move_from': [],
   'move_from_grids': [],
   'move_to': [],
   'move_to_grids': []},
  3: {'appear': 3,
   'appear_grids': [[1, 4], [1, 4, 2, 5, 3, 6], [1, 4]],
   'disappear': 0,
   'disappear_grids': [],
   'move': 0,
   'move_from': [],
   'move_from_grids': [],
   'move_to': [],
   'move_to_grids': []},
  4: {'appear': 1,
   'appear_grids': [[4]],
   'disappear': 0,
   'disappear_grids': [],
   'move': 0,
   'move_from': [],
   'move_from_grids': [],
   'move_to': [],
   'move_to_grids': []},

In [65]:
background_class = [9, 11]  # done
side_walk = [2]  # done
side_walk_object_beside = [4, 5, 10]  # done
side_walk_object_over = [6, 27]  # done
road = [1]
road_moving_object = [13, 14, 15, 16, 17, 18, 19, 28, 29, 13]  # done
road_over_object = [7, 8]  # done
human = [12]  # done
ignore = [0]  # done
construction = [3, 23, 24, 25, 26] # done
parking = [22]
dynamic = [20]


descriptions = {}
pos_dict = {
    1: "far left",
    2: "far center",
    3: "far right",
    4: "left",
    5: "center",
    6: "right"
}

appear_syn_sing = ['becomes visible', 'comes into view', 'appears']
appear_syn_plu = ['become visible', 'come into view', 'appear']

disappear_syn_plu = ["vanish", "pass from sight", "vanish from sight", 
                      "recede from view", "are lost to sight"]

disappear_syn_sing = ["vanishes", "passes from sight", "vanishes from sight", 
                      "recedes from view", "is lost to sight"]


bg_described = False

labels_dicto = ObjDetector().labels_dict
for key in description_dictionary.keys():
    descriptions[key] = []
    for key2 in description_dictionary[key].keys():
        appear_count = description_dictionary[key][key2]["appear"]
        disappear_count = description_dictionary[key][key2]["disappear"]
        move_count = description_dictionary[key][key2]["move"]
        
        if key2 in background_class:
            if not bg_described:
                bg_obj_names = []
                for bg_obj in background_class:
                    if description_dictionary[key][bg_obj]['appear'] > 0:
                        bg_obj_names.append(labels_dicto[bg_obj]['name'])
                if 'sky' in bg_obj_names and 'vegetation' in bg_obj_names:  
                    descriptions[key].append(
                        "There are sky and bunch of trees in the horizon"
                    )
                elif 'sky' in bg_obj_names and 'vegetation' not in bg_obj_names:  
                    descriptions[key].append(
                        "There is sky in the horizon"
                    )
                else:
                    descriptions[key].append(
                        "There are sky and bunch of trees in the horizon"
                    )
                bg_described = True
            else:
                bg_obj_names = []
                for bg_obj in background_class:
                    if description_dictionary[key][bg_obj]['appear'] > 0:
                        bg_obj_names.append(labels_dicto[bg_obj]['name'])
                if len(bg_obj_names) == 0:
                    bg_described = False
            continue
            
        if appear_count > 0:
            add_s = ''
            if appear_count > 1:
                add_s = 's'
                
            if key2 in ignore:
                continue      
            elif key2 in side_walk or key2 in construction or key2 in human:
                all_appear_grids = []

                for grd in description_dictionary[key][key2]["appear_grids"]:
                    all_appear_grids += grd

                unique_appear_grid = list(set(all_appear_grids))
                pos_s = ''

                if len(unique_appear_grid) >= 5:
                    pos_s = f"At your both side there are {labels_dicto[key2]['name']}s"
                elif len(unique_appear_grid) == 1:
                    rand_ind = random.randint(0, len(appear_syn_sing)-1)
                    pos_s = f"A {labels_dicto[key2]['name']} {appear_syn_sing[rand_ind]} at the {pos_dict[unique_appear_grid[0]]}"
                elif len(unique_appear_grid) == 2:
                    pos_s = f"On your {pos_dict[unique_appear_grid[0]]} and {pos_dict[unique_appear_grid[1]]} side there are {labels_dicto[key2]['name']}s"
                elif len(unique_appear_grid) == 3 or len(unique_appear_grid) == 4:
                    far_count = 0
                    close_count = 0
                    for bb in range(3):
                        if "far" in pos_dict[unique_appear_grid[bb]]:
                            far_count += 1
                        else:
                            close_count += 1
                    if far_count == 3:
                        pos_s = f"There are some {labels_dicto[key2]['name']}s far in front of you"
                    elif close_count == 3:
                        pos_s = f"There are some {labels_dicto[key2]['name']}s near in front of you"
                    else:
                        left_count = 0
                        right_count = 0
                        center_count = 0
                        for bb in range(3):
                            if "left" in pos_dict[unique_appear_grid[bb]]:
                                left_count += 1
                            elif "right" in pos_dict[unique_appear_grid[bb]]:
                                right_count += 1
                            else:
                                center_count += 1
                        pos_array = ['left', 'center', 'right']
                        pos_ind_ar = [left_count, center_count, right_count]
                        max_pos_ind = pos_ind_ar.index(max(pos_ind_ar))
                        max_pos_n = pos_array[max_pos_ind]
                        if max_pos_n == 'center':
                            if appear_count > 1:
                                pos_s = f"A few {labels_dicto[key2]['name']}s are ahead of you"
                            else:
                                pos_s = f"A {labels_dicto[key2]['name']} is ahead of you"
                        else:
                            if appear_count > 1:
                                pos_s = f"A few {labels_dicto[key2]['name']}s are at your {max_pos_n}"
                            else:
                                pos_s = f"A {labels_dicto[key2]['name']} is at your {max_pos_n}"
                if pos_s:
                    descriptions[key].append(
                        pos_s
                    )

            elif key2 in road_moving_object or key2 in road_over_object:  
                all_appear_grids = []

                for grd in description_dictionary[key][key2]["appear_grids"]:
                    all_appear_grids += grd

                unique_appear_grid = list(set(all_appear_grids))
                pos_s = ''

                if key2 in road_moving_object:
                    st_str = "On"
                else:
                    st_str = "Over"

                if len(unique_appear_grid) >= 5:
                    pos_s = f"{st_str} the road there are {labels_dicto[key2]['name']}s at your both side"
                elif len(unique_appear_grid) == 1:
                    rand_ind = random.randint(0, len(appear_syn_sing)-1)
                    pos_s = f"{st_str} the road a {labels_dicto[key2]['name']} {appear_syn_sing[rand_ind]} at the {pos_dict[unique_appear_grid[0]]}"
                elif len(unique_appear_grid) == 2:
                    pos_s = f"{st_str} your {pos_dict[unique_appear_grid[0]]} and {pos_dict[unique_appear_grid[1]]} side there are {labels_dicto[key2]['name']}s on the road"
                elif len(unique_appear_grid) == 3 or len(unique_appear_grid) == 4:
                    far_count = 0
                    close_count = 0
                    for bb in range(3):
                        if "far" in pos_dict[unique_appear_grid[bb]]:
                            far_count += 1
                        else:
                            close_count += 1
                    if far_count == 3:
                        pos_s = f"There are some {labels_dicto[key2]['name']}s far in front of you"
                    elif close_count == 3:
                        pos_s = f"There are some {labels_dicto[key2]['name']}s near in front of you"
                    else:
                        left_count = 0
                        right_count = 0
                        center_count = 0
                        for bb in range(3):
                            if "left" in pos_dict[unique_appear_grid[bb]]:
                                left_count += 1
                            elif "right" in pos_dict[unique_appear_grid[bb]]:
                                right_count += 1
                            else:
                                center_count += 1
                        pos_array = ['left', 'center', 'right']
                        pos_ind_ar = [left_count, center_count, right_count]
                        max_pos_ind = pos_ind_ar.index(max(pos_ind_ar))
                        max_pos_n = pos_array[max_pos_ind]
                        if max_pos_n == 'center':
                            if appear_count > 1:
                                pos_s = f"A few {labels_dicto[key2]['name']}s are ahead of you"
                            else:
                                pos_s = f"A {labels_dicto[key2]['name']} is ahead of you"
                        else:
                            if appear_count > 1:
                                pos_s = f"A few {labels_dicto[key2]['name']}s are at your {max_pos_n}"
                            else:
                                pos_s = f"A {labels_dicto[key2]['name']} is at your {max_pos_n}"
                if pos_s:
                    descriptions[key].append(
                        pos_s
                    )

            elif key2 in side_walk_object_beside or key2 in side_walk_object_over:         
                all_appear_grids = []

                for grd in description_dictionary[key][key2]["appear_grids"]:
                    all_appear_grids += grd

                unique_appear_grid = list(set(all_appear_grids))
                pos_s = ''

                if key2 in side_walk_object_beside:
                    st_str = "Beside"
                else:
                    st_str = "Over"

                if len(unique_appear_grid) >= 5:
                    pos_s = f"{st_str} the sidewalk there are {labels_dicto[key2]['name']}s at your both side"
                elif len(unique_appear_grid) == 1:
                    rand_ind = random.randint(0, len(appear_syn_sing)-1)
                    pos_s = f"{st_str} the {pos_dict[unique_appear_grid[0]]} sidewalk a {labels_dicto[key2]['name']} {appear_syn_sing[rand_ind]}"
                elif len(unique_appear_grid) == 2:
                    pos_s = f"{st_str} the {pos_dict[unique_appear_grid[0]]} and {pos_dict[unique_appear_grid[1]]} sidewalk there are {labels_dicto[key2]['name']}s"
                elif len(unique_appear_grid) == 3 or len(unique_appear_grid) == 4:
                    far_count = 0
                    close_count = 0
                    for bb in range(3):
                        if "far" in pos_dict[unique_appear_grid[bb]]:
                            far_count += 1
                        else:
                            close_count += 1
                    if far_count == 3:
                        pos_s = f"There are some {labels_dicto[key2]['name']}s far in front of you"
                    elif close_count == 3:
                        pos_s = f"There are some {labels_dicto[key2]['name']}s near in front of you"
                    else:
                        left_count = 0
                        right_count = 0
                        center_count = 0
                        for bb in range(3):
                            if "left" in pos_dict[unique_appear_grid[bb]]:
                                left_count += 1
                            elif "right" in pos_dict[unique_appear_grid[bb]]:
                                right_count += 1
                            else:
                                center_count += 1
                        pos_array = ['left', 'center', 'right']
                        pos_ind_ar = [left_count, center_count, right_count]
                        max_pos_ind = pos_ind_ar.index(max(pos_ind_ar))
                        max_pos_n = pos_array[max_pos_ind]
                        if max_pos_n == 'center':
                            if appear_count > 1:
                                pos_s = f"A few {labels_dicto[key2]['name']}s are ahead of you"
                            else:
                                pos_s = f"A {labels_dicto[key2]['name']} is ahead of you"
                        else:
                            if appear_count > 1:
                                pos_s = f"A few {labels_dicto[key2]['name']}s are at your {max_pos_n}"
                            else:
                                pos_s = f"A {labels_dicto[key2]['name']} is at your {max_pos_n}"
                if pos_s:
                    descriptions[key].append(
                        pos_s
                    )
                    
        if disappear_count > 0:
            add_s = ''
            ws_wwr = 'was'
            if disappear_count > 1:
                add_s = 's'
                ws_wwr = 'were'
                
            if key2 in ignore:
                continue      
            elif key2 in side_walk or key2 in construction or key2 in human:
                all_disappear_grids = []

                for grd in description_dictionary[key][key2]["disappear_grids"]:
                    all_disappear_grids += grd

                unique_disappear_grid = list(set(all_disappear_grids))
                pos_s = ''

                if len(unique_disappear_grid) >= 5:
                    rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                    pos_s = f"From your both side {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]}"
                elif len(unique_disappear_grid) == 1:
                    rand_ind = random.randint(0, len(disappear_syn_sing)-1)
                    pos_s = f"A {labels_dicto[key2]['name']} {disappear_syn_sing[rand_ind]} from the {pos_dict[unique_disappear_grid[0]]}"
                elif len(unique_disappear_grid) == 2:
                    rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                    pos_s = f"A few {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]} from your {pos_dict[unique_disappear_grid[0]]} and {pos_dict[unique_disappear_grid[1]]}"
                elif len(unique_disappear_grid) == 3 or len(unique_disappear_grid) == 4:
                    far_count = 0
                    close_count = 0
                    for bb in range(3):
                        if "far" in pos_dict[unique_disappear_grid[bb]]:
                            far_count += 1
                        else:
                            close_count += 1
                    if far_count == 3:
                        rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                        pos_s = f"From the far front some {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]}"
                    elif close_count == 3:
                        rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                        pos_s = f"From near front some {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]}"
                    else:
                        left_count = 0
                        right_count = 0
                        center_count = 0
                        for bb in range(3):
                            if "left" in pos_dict[unique_disappear_grid[bb]]:
                                left_count += 1
                            elif "right" in pos_dict[unique_disappear_grid[bb]]:
                                right_count += 1
                            else:
                                center_count += 1
                        pos_array = ['left', 'center', 'right']
                        pos_ind_ar = [left_count, center_count, right_count]
                        max_pos_ind = pos_ind_ar.index(max(pos_ind_ar))
                        max_pos_n = pos_array[max_pos_ind]
                        if max_pos_n == 'center':
                            if disappear_count > 1:
                                rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                                pos_s = f"A few {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]} from ahead of you"
                            else:
                                rand_ind = random.randint(0, len(disappear_syn_sing)-1)
                                pos_s = f"A {labels_dicto[key2]['name']} {disappear_syn_sing[rand_ind]} from ahead of you"
                        else:
                            if disappear_count > 1:
                                rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                                pos_s = f"Few {labels_dicto[key2]['name']}s from your {max_pos_n} {disappear_syn_plu[rand_ind]}"
                            else:
                                rand_ind = random.randint(0, len(disappear_syn_sing)-1)
                                pos_s = f"The {labels_dicto[key2]['name']} which was at your {max_pos_n} {disappear_syn_sing[rand_ind]}"
                if pos_s:
                    descriptions[key].append(
                        pos_s
                    )
                    
                    
            elif key2 in road_moving_object or key2 in road_over_object:  
                all_disappear_grids = []

                for grd in description_dictionary[key][key2]["disappear_grids"]:
                    all_disappear_grids += grd

                unique_disappear_grid = list(set(all_disappear_grids))
                pos_s = ''
                
                if key2 in road_moving_object:
                    st_str = "On"
                else:
                    st_str = "Over"
                
                if len(unique_disappear_grid) >= 5:
                    rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                    pos_s = f"From your both side {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]}"
                elif len(unique_disappear_grid) == 1:
                    rand_ind = random.randint(0, len(disappear_syn_sing)-1)
                    pos_s = f"A {labels_dicto[key2]['name']} {disappear_syn_sing[rand_ind]} from the {pos_dict[unique_disappear_grid[0]]}"
                elif len(unique_disappear_grid) == 2:
                    rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                    pos_s = f"A few {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]} from your {pos_dict[unique_disappear_grid[0]]} and {pos_dict[unique_disappear_grid[1]]}"
                elif len(unique_disappear_grid) == 3 or len(unique_disappear_grid) == 4:
                    far_count = 0
                    close_count = 0
                    for bb in range(3):
                        if "far" in pos_dict[unique_disappear_grid[bb]]:
                            far_count += 1
                        else:
                            close_count += 1
                    if far_count == 3:
                        rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                        pos_s = f"From the far front some {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]}"
                    elif close_count == 3:
                        rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                        pos_s = f"From near front some {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]}"
                    else:
                        left_count = 0
                        right_count = 0
                        center_count = 0
                        for bb in range(3):
                            if "left" in pos_dict[unique_disappear_grid[bb]]:
                                left_count += 1
                            elif "right" in pos_dict[unique_disappear_grid[bb]]:
                                right_count += 1
                            else:
                                center_count += 1
                        pos_array = ['left', 'center', 'right']
                        pos_ind_ar = [left_count, center_count, right_count]
                        max_pos_ind = pos_ind_ar.index(max(pos_ind_ar))
                        max_pos_n = pos_array[max_pos_ind]
                        if max_pos_n == 'center':
                            if disappear_count > 1:
                                rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                                pos_s = f"A few {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]} from ahead of you"
                            else:
                                rand_ind = random.randint(0, len(disappear_syn_sing)-1)
                                pos_s = f"A {labels_dicto[key2]['name']} {disappear_syn_sing[rand_ind]} from ahead of you"
                        else:
                            if disappear_count > 1:
                                rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                                pos_s = f"Few {labels_dicto[key2]['name']}s from your {max_pos_n} {disappear_syn_plu[rand_ind]}"
                            else:
                                rand_ind = random.randint(0, len(disappear_syn_sing)-1)
                                pos_s = f"The {labels_dicto[key2]['name']} which was at your {max_pos_n} {disappear_syn_sing[rand_ind]}"
                if pos_s:
                    descriptions[key].append(
                        pos_s
                    )
                    
            elif key2 in side_walk_object_beside or key2 in side_walk_object_over: 
                all_disappear_grids = []

                for grd in description_dictionary[key][key2]["disappear_grids"]:
                    all_disappear_grids += grd

                unique_disappear_grid = list(set(all_disappear_grids))
                pos_s = ''
                
                if key2 in road_moving_object:
                    st_str = "Beside"
                else:
                    st_str = "Over"
                
                if len(unique_disappear_grid) >= 5:
                    rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                    pos_s = f"From your both side {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]}"
                elif len(unique_disappear_grid) == 1:
                    rand_ind = random.randint(0, len(disappear_syn_sing)-1)
                    pos_s = f"A {labels_dicto[key2]['name']} {disappear_syn_sing[rand_ind]} from the {pos_dict[unique_disappear_grid[0]]}"
                elif len(unique_disappear_grid) == 2:
                    rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                    pos_s = f"A few {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]} from your {pos_dict[unique_disappear_grid[0]]} and {pos_dict[unique_disappear_grid[1]]}"
                elif len(unique_disappear_grid) == 3 or len(unique_disappear_grid) == 4:
                    far_count = 0
                    close_count = 0
                    for bb in range(3):
                        if "far" in pos_dict[unique_disappear_grid[bb]]:
                            far_count += 1
                        else:
                            close_count += 1
                    if far_count == 3:
                        rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                        pos_s = f"From the far front some {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]}"
                    elif close_count == 3:
                        rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                        pos_s = f"From near front some {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]}"
                    else:
                        left_count = 0
                        right_count = 0
                        center_count = 0
                        for bb in range(3):
                            if "left" in pos_dict[unique_disappear_grid[bb]]:
                                left_count += 1
                            elif "right" in pos_dict[unique_disappear_grid[bb]]:
                                right_count += 1
                            else:
                                center_count += 1
                        pos_array = ['left', 'center', 'right']
                        pos_ind_ar = [left_count, center_count, right_count]
                        max_pos_ind = pos_ind_ar.index(max(pos_ind_ar))
                        max_pos_n = pos_array[max_pos_ind]
                        if max_pos_n == 'center':
                            if disappear_count > 1:
                                rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                                pos_s = f"A few {labels_dicto[key2]['name']}s {disappear_syn_plu[rand_ind]} from ahead of you"
                            else:
                                rand_ind = random.randint(0, len(disappear_syn_sing)-1)
                                pos_s = f"A {labels_dicto[key2]['name']} {disappear_syn_sing[rand_ind]} from ahead of you"
                        else:
                            if disappear_count > 1:
                                rand_ind = random.randint(0, len(disappear_syn_plu)-1)
                                pos_s = f"Few {labels_dicto[key2]['name']}s from your {max_pos_n} {disappear_syn_plu[rand_ind]}"
                            else:
                                rand_ind = random.randint(0, len(disappear_syn_sing)-1)
                                pos_s = f"The {labels_dicto[key2]['name']} which was at your {max_pos_n} {disappear_syn_sing[rand_ind]}"
                if pos_s:
                    descriptions[key].append(
                        pos_s
                    )
                    
        if move_count > 0:
            if key2 in road_moving_object or key2 in human:
                for m_c in range(move_count):
                    move_from = description_dictionary[key][key2]["move_from"]
                    move_to = description_dictionary[key][key2]["move_to"]
                    y_dir = move_to[m_c][0] - move_from[m_c][0]
                    x_dir = move_to[m_c][1] - move_from[m_c][1]

                    all_appear_grids = []

                    for grd in description_dictionary[key][key2]["move_from_grids"]:
                        all_appear_grids += grd

                    unique_appear_grid = list(set(all_appear_grids))

                    if y_dir < 0:
                        y_dir_s = "toward far"
                    elif y_dir > 0:
                        y_dir_s = "closer to"
                    else:
                        y_dir_s = ""

                    if x_dir < 0:
                        x_dir_s = "right"
                    elif x_dir > 0:
                        x_dir_s = "left"
                    else:
                        x_dir_s = ""

                    if x_dir_s and y_dir_s:
                        mid_hi = " "
                    else:
                        mid_hi = ""

                    if x_dir_s or y_dir_s:
                        pos_s = ''

                        if len(unique_appear_grid) >= 5:
                            pos_s = ""
                        elif len(unique_appear_grid) == 1:
                            pos_s = f"The {labels_dicto[key2]['name']} which was at {pos_dict[unique_appear_grid[0]]} moves slightly {y_dir_s}{mid_hi}{x_dir_s}"
                        elif len(unique_appear_grid) == 2:
                            pos_s = f"The {labels_dicto[key2]['name']} which was at {pos_dict[unique_appear_grid[0]]}-{pos_dict[unique_appear_grid[1]]} moves slightly {y_dir_s}{mid_hi}{x_dir_s}"
                        elif len(unique_appear_grid) == 3 or len(unique_appear_grid) == 4:
                            far_count = 0
                            close_count = 0
                            for bb in range(3):
                                if "far" in pos_dict[unique_appear_grid[bb]]:
                                    far_count += 1
                                else:
                                    close_count += 1
                            if far_count == 3:
                                pos_s = f"The {labels_dicto[key2]['name']} from the far front moves slightly {y_dir_s}{mid_hi}{x_dir_s}"
                            elif close_count == 3:
                                pos_s = f"The {labels_dicto[key2]['name']} from near front moves slightly {y_dir_s}{mid_hi}{x_dir_s}"
                            else:
                                left_count = 0
                                right_count = 0
                                center_count = 0
                                for bb in range(3):
                                    if "left" in pos_dict[unique_appear_grid[bb]]:
                                        left_count += 1
                                    elif "right" in pos_dict[unique_appear_grid[bb]]:
                                        right_count += 1
                                    else:
                                        center_count += 1
                                pos_array = ['left', 'center', 'right']
                                pos_ind_ar = [left_count, center_count, right_count]
                                max_pos_ind = pos_ind_ar.index(max(pos_ind_ar))
                                max_pos_n = pos_array[max_pos_ind]
                                if max_pos_n == 'center':
                                    pos_s = f"The {labels_dicto[key2]['name']} ahead of you moves slightly {y_dir_s}{mid_hi}{x_dir_s}"
                                else:
                                    pos_s = f"The {labels_dicto[key2]['name']} from your {max_pos_n} slightly {y_dir_s}{mid_hi}{x_dir_s}"

                        if pos_s:
                            descriptions[key].append(
                                pos_s
                            )


                

        

# for key in description_dictionary.keys():
#     descriptions[key] = []
#     for key2 in description_dictionary[key].keys():
#         appear_count = description_dictionary[key][key2]["appear"]
#         disappear_count = description_dictionary[key][key2]["disappear"]
#         move_count = description_dictionary[key][key2]["move"]
#         if appear_count > 0:
#             add_s = ''
#             if appear_count > 1:
#                 add_s = 's'
                
#             all_appera_grids = []
            
#             for grd in description_dictionary[key][key2]["appear_grids"]:
#                 all_appera_grids += grd
            
#             unique_appear_grid = list(set(all_appera_grids))
            
#             if len(unique_appear_grid) >= 5:
#                 pos_s = "surrounding you"
#             elif len(unique_appear_grid) == 1:
#                 pos_s = f"at {pos_dict[unique_appear_grid[0]]} position"
#             elif len(unique_appear_grid) == 2:
#                 pos_s = f"at {pos_dict[unique_appear_grid[0]]} and {pos_dict[unique_appear_grid[1]]} position"
#             elif len(unique_appear_grid) == 3:
#                 upper_count = 0
#                 mid_count = 0
#                 for bb in range(3):
#                     if "upper" in pos_dict[unique_appear_grid[bb]]:
#                         upper_count += 1
#                     elif "mid" in pos_dict[unique_appear_grid[bb]]:
#                         mid_count += 1
#                 if upper_count == 3:
#                     pos_s = "covering upper portion"
#                 elif mid_count == 3:
#                     pos_s = "covering mid portion"
#                 else:
#                     pos_s = "at both upper and mid portion"
#             else:
#                 pos_s = "at both upper and mid portion"                  
#             descriptions[key].append(
#                 f"{num2words(appear_count).capitalize()} {labels_dicto[key2]['name']}{add_s} appeared in the frame {pos_s}"
#             )
#         if disappear_count > 0:
#             add_s = ''
#             ws_wwr = 'was'
#             if disappear_count > 1:
#                 add_s = 's'
#                 ws_wwr = 'were'
                
#             all_disappera_grids = []
            
#             for grd in description_dictionary[key][key2]["disappear_grids"]:
#                 all_disappera_grids += grd
            
#             unique_disappear_grid = list(set(all_disappera_grids))
            
#             if len(unique_disappear_grid) >= 5:
#                 pos_s = f", which {ws_wwr} surrounding you"
#             elif len(unique_disappear_grid) == 1:
#                 pos_s = f"'s {pos_dict[all_disappera_grids[0]]} position"
#             elif len(unique_disappear_grid) == 2:
#                 pos_s = f"'s {pos_dict[all_disappera_grids[0]]} and {pos_dict[all_disappera_grids[1]]} position"
#             elif len(unique_disappear_grid) == 3:
#                 upper_count = 0
#                 mid_count = 0
#                 for bb in range(3):
#                     if "upper" in pos_dict[all_disappera_grids[bb]]:
#                         upper_count += 1
#                     elif "mid" in pos_dict[all_disappera_grids[bb]]:
#                         mid_count += 1
#                 if upper_count == 3:
#                     pos_s = ", which {ws_wwr} covering upper portion"
#                 elif mid_count == 3:
#                     pos_s = ", which {ws_wwr} covering mid portion"
#                 else:
#                     pos_s = " from both upper and mid portion"
#             else:
#                 pos_s = " from both upper and mid portion"                  
            
#             descriptions[key].append(
#                 f"{num2words(disappear_count).capitalize()} {labels_dicto[key2]['name']}{add_s} disappeared from the frame{pos_s}"
#             )
#         if move_count > 0:
#             for m_c in range(move_count):
#                 move_from = description_dictionary[key][key2]["move_from"]
#                 move_to = description_dictionary[key][key2]["move_to"]
#                 y_dir = move_to[m_c][0] - move_from[m_c][0]
#                 x_dir = move_to[m_c][1] - move_from[m_c][1]
#                 if y_dir < 0:
#                     y_dir_s = "upper"
#                 elif y_dir > 0:
#                     y_dir_s = "bottom"
#                 else:
#                     y_dir_s = ""
                    
#                 if x_dir < 0:
#                     x_dir_s = "right"
#                 elif x_dir > 0:
#                     x_dir_s = "left"
#                 else:
#                     x_dir_s = ""
                    
#                 if x_dir_s and y_dir_s:
#                     mid_hi = "-"
#                 else:
#                     mid_hi = ""
                    
#                 if x_dir_s or y_dir_s:
#                     descriptions[key].append(
#                         f"One {labels_dicto[key2]['name']} moved slightly {y_dir_s}{mid_hi}{x_dir_s} direction"
#                     )


In [66]:
write_lines = []
for key in descriptions.keys():
    write_lines.append(f"####################Frame {str(key).zfill(2)}####################")
    for des in descriptions[key]:
        write_lines.append(des.replace("center", "front"))

with open('descriptions.txt', 'w') as f:
    f.write("\n".join(write_lines))
    