### Word In Visual Genome

In this notebook, the first 1000 words were searched in English as words and lemmas in the visual genome data sections (objects, attributes, relations etc.) and the results were obtained.

In [1]:
import os
import sys
import pandas as pd
import numpy as np
import re
import glob
from pathlib import Path
import shutil
from os.path import isfile, join

In [2]:
# language pair
lang_folder = "Turkish"  # Arabic, English, French, German, Turkish, Spanish, Portuguese, Dutch, Italian ==> target language for learner
#lang_pair = "English"  # Arabic, English, French, German, Turkish, Spanish, Portuguese, Dutch, Italian ==> native language

file_ext = 1000

# take sample
search_sample = 50

In [3]:
path = f"/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/{lang_folder.capitalize()}/\
Lemma Stem POS/Result/3-1-Word In Visual Genome"

Path(path).mkdir(parents=True, exist_ok=True)

In [4]:
def word_group_dataframe(df, search_list, target_column, sample_num):
    '''
    word_group_dataframe(df, search_list, target_column, sample_num)\n
    df is dataframe and target_column is its column for external searching_list\n
    sample_num for take sample.
    ''' 
    df_search_result = pd.DataFrame()
    for j in search_list:
        df_select = df[df[f"{target_column}"].str.contains(fr"(?:\s|^){j}(?:\s|$)", na=False, regex=True)]
        #df_select.sort_values(f"{target_column}",key=lambda x:x.str.len(), inplace=True).head(sample_num)
        df_select = df_select.sort_values(f"{target_column}",key=lambda x:x.str.len()).head(sample_num)               
        df_select.insert(0,"search_string",j)
        df_search_result = pd.concat([df_search_result,df_select], axis=0)
    df_search_result.reset_index(inplace=True, drop=True)
    
    return df_search_result

In [5]:
def word_group_dataframe_all(df, search_list, target_column):
    '''
    word_group_dataframe(df, search_list, target_column)\n
    df is dataframe and target_column is its column for external searching_list\n
    ''' 
    df_search_result = pd.DataFrame()
    for j in search_list:
        df_select = df[df[f"{target_column}"].str.contains(fr"(?:\s|^){j}(?:\s|$)", na=False, regex=True)]
        #df_select.sort_values(f"{target_column}",key=lambda x:x.str.len(), inplace=True)
        df_select = df_select.sort_values(f"{target_column}",key=lambda x:x.str.len())
        df_select.insert(0,"search_string",j)
        df_search_result = pd.concat([df_search_result,df_select], axis=0)        
    df_search_result.reset_index(inplace=True, drop=True)
    
    return df_search_result

#### Visual Genome Data

In [6]:
df_genome_objects = pd.read_csv(f"/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Turkish/Lemma Stem POS/Result/3-0-Visual Genome Process/Visual_Genome_Objects_Analysis.csv")
df_genome_objects.drop("image_url", inplace=True, axis=1)
df_genome_objects

  df_genome_objects = pd.read_csv(f"/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Turkish/Lemma Stem POS/Result/3-0-Visual Genome Process/Visual_Genome_Objects_Analysis.csv")


Unnamed: 0,object_id,names,image_id,synsets,merged_object_ids,height,width,x_koor,y_koor,num
0,1058549,trees,1,tree.n.01,,557,799,0,0,55779900
1,1058534,sidewalk,1,sidewalk.n.01,5046,290,722,78,308,29072278308
2,1058508,building,1,building.n.01,,538,222,1,0,53822210
3,1058539,street,1,street.n.01,3798578,258,359,439,283,258359439283
4,1058543,wall,1,wall.n.01,,535,135,0,1,53513501
...,...,...,...,...,...,...,...,...,...,...
2516934,3506171,number,2417997,numeral.n.01,,24,28,188,228,2428188228
2516935,3786799,plate,2417997,plate.n.04,,54,89,238,233,5489238233
2516936,3103226,road,2417997,road.n.01,,83,90,312,247,8390312247
2516937,3094589,road,2417997,road.n.01,,121,495,2,208,1214952208


In [7]:
df_genome_objects["names"].nunique()

82825

In [8]:
df_genome_attributes = pd.read_csv(f"/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Turkish/Lemma Stem POS/Result/3-0-Visual Genome Process/Visual_Genome_Attributes_Analysis.csv")
df_genome_attributes

Unnamed: 0,object_id,names,image_id,attributes,synsets,height,width,x_koor,y_koor,num
0,1058498,clock,1,"green , tall",clock.n.01,339,79,421,91,3397942191
1,5046,street,1,sidewalk,street.n.01,262,714,77,328,26271477328
2,5045,shade,1,,shade.n.01,192,274,119,338,192274119338
3,1058529,man,1,,man.n.01,262,60,238,249,26260238249
4,5048,sneakers,1,grey,gym_shoe.n.01,26,52,243,489,2652243489
...,...,...,...,...,...,...,...,...,...,...
3802369,3422177,sky,2417997,"crystal clear , blue",sky.n.01,170,497,1,2,17049712
3802370,3103226,road,2417997,,road.n.01,83,90,312,247,8390312247
3802371,2712437,doors,2417997,,door.n.01,144,50,98,127,1445098127
3802372,3140197,bus,2417997,green,bus.n.01,155,225,29,109,15522529109


In [9]:
df_genome_relationships = pd.read_csv(f"/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Turkish/Lemma Stem POS/Result/3-0-Visual Genome Process/Visual_Genome_Relationships_Analysis.csv")
df_genome_relationships

Unnamed: 0,image_id,obj_names,obj_object_id,obj_synsets,obj_merged_object_ids,obj_height,obj_width,obj_x_koor,obj_y_koor,relationships_id,synsets,sub_name,sub_object_id,sub_synsets,sub_height,sub_width,sub_x_koor,sub_y_koor,predicate,num
0,1,sidewalk,1058534,sidewalk.n.01,5046,290,722,78,308,15927,along.r.01,shade,5045,shade.n.01,192,274,119,338,on,192274119338
1,1,shoes,1058525,shoe.n.01,5048,28,48,388,485,15928,wear.v.01,man,1058529,man.n.01,262,60,238,249,wears,26260238249
2,1,,5050,headlight.n.01,,15,23,514,366,15929,have.v.01,car,5049,car.n.01,98,74,479,315,has,9874479315
3,1,,1058508,building.n.01,,536,218,1,2,15930,along.r.01,sign,1058507,sign.n.02,182,88,118,13,on,1828811813
4,1,,1058534,sidewalk.n.01,,266,722,77,331,15931,along.r.01,tree trunk,5055,trunk.n.01,327,87,622,234,on,32787622234
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2316099,2417997,,3103225,bus.n.01,,193,289,14,110,4245033,along.r.01,plate,3786799,plate.n.04,54,89,238,233,on,5489238233
2316100,2417997,,3786800,desert.n.01,,119,157,301,211,4245034,in.r.01,,3103225,bus.n.01,182,287,14,110,in,18228714110
2316101,2417997,,3103226,road.n.01,,83,90,312,247,3722727,along.r.01,bus,3103225,bus.n.01,193,289,14,110,on,19328914110
2316102,2417997,,2712437,door.n.01,,144,50,98,127,3880229,,bus,3103225,bus.n.01,193,289,14,110,with,19328914110


In [10]:
df_genome_question_answers = pd.read_csv(f"/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Turkish/Lemma Stem POS/Result/3-0-Visual Genome Process/Visual_Genome_Question_Answers_Analysis.csv")
df_genome_question_answers

Unnamed: 0,image_id,questions,que_and_ans_id,answers,num
0,1,what color is the clock,986768,green,0
1,1,how many people are there,986769,two,0
2,1,what color is the man s shirt,986772,red,0
3,1,what are the men doing,986777,interacting,0
4,1,where is the white work truck,986780,parked on the street,0
...,...,...,...,...,...
1445317,2417995,when was the photo taken,955110,day time,0
1445318,2417995,why is it so bright,955111,due to natural sunlight,0
1445319,2417995,how many trains are there,955112,one,0
1445320,2417995,what is the train made of,955113,steel,0


In [11]:
df_genome_region_descriptions = pd.read_csv(f"/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Turkish/Lemma Stem POS/Result/3-0-Visual Genome Process/Visual_Genome_Region_Descriptions_Analysis.csv")
df_genome_region_descriptions

  df_genome_region_descriptions = pd.read_csv(f"/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/Turkish/Lemma Stem POS/Result/3-0-Visual Genome Process/Visual_Genome_Region_Descriptions_Analysis.csv")


Unnamed: 0,image_id,phrases,region_id,height,width,x_koor,y_koor,num
0,1,the clock is green in colour,1382,139,82,421,57,1398242157
1,1,shade is along the street,1383,109,182,194,372,109182194372
2,1,man is wearing sneakers,1384,30,61,241,491,3061241491
3,1,cars headlights are off,1385,36,36,617,377,3636617377
4,1,bikes are parked at the far edge,1386,49,41,322,298,4941322298
...,...,...,...,...,...,...,...,...
5408684,2417997,a green school bus traveling down a road,5516956,181,303,0,111,1813030111
5408685,2417997,numbers on the front of a bus,5516957,15,23,191,231,1523191231
5408686,2417997,a yellow license plate on a bus,5516958,13,17,259,247,1317259247
5408687,2417997,a country road between two mountains,5516959,112,317,180,218,112317180218


#### Word Search In Genome Data

##### Word Data Read

In [12]:
#df_word_raw = pd.read_excel(f"/media/kurubal/SSD/Data Scientist/Work/Modern Ways/Project/{lang_folder.capitalize()}/Lemma Stem POS/Result/2-Word Select Process/{lang_folder.capitalize()}_45000_Lemma_Pos_Process.xlsx")
#df_word_raw

In [13]:
#word_list = ['an','anlama','doğum','gidiyor','günün','istediğim','istediğin','istediğiniz','kutlu','olmuş','soru','söylemek','zamanki','zor','şuna']

In [14]:
#df_word_raw = df_word_select[df_word_select["word"].isin(word_list)]
#df_word_raw.reset_index(drop=True, inplace=True)
#df_word_raw.drop(["frequency","stem"], inplace=True, axis=1)
#df_word_raw

In [15]:
df_word_raw = pd.read_excel(f"Turkish_{file_ext}_Process.xlsx")
df_word_raw

Unnamed: 0,POS1,POS2,word,lemma.spacy,stem,word_en_translate,lemma_en_translate,frequency
0,NUM,,bir,bir,bir,a,a,18835735
1,PRON,,bu,bu,bu,this,this,11062659
2,PRON,Q,ne,ne,ne,what,what,8025880
3,CCONJ,,ve,ve,ve,and,and,7766036
4,ADP,,için,için,için,for,for,5484109
...,...,...,...,...,...,...,...,...
995,ADJ,,resmi,resmi,resmi,formal,formal,68287
996,VERB,,veriyor,ver,ver,giving,give,68163
997,NOUN,,okul,okul,oku,school,school,68160
998,NOUN,,suçlu,suç,suç,guilty,crime,68124


In [16]:
df_word_raw_word_trans_list = list(set(df_word_raw["word_en_translate"]))
df_word_raw_lemma_trans_list = list(set(df_word_raw["lemma_en_translate"]))

In [17]:
len(df_word_raw_word_trans_list)

818

In [18]:
len(df_word_raw_lemma_trans_list)

486

##### Read Target Data 

###### Object Data

In [19]:
df_objects_search_word_result = word_group_dataframe(df_genome_objects, df_word_raw_word_trans_list, "names", search_sample)
df_objects_search_word_result.rename(columns={"search_string":"word_en_translate"}, inplace=True)
df_objects_search_word_result

Unnamed: 0,word_en_translate,object_id,names,image_id,synsets,merged_object_ids,height,width,x_koor,y_koor,num
0,young,3907515,young,2347740,,,138,214,116,135,138214116135
1,young,260558,young,2408497,young.n.01,3809177,199,51,395,105,19951395105
2,young,3003611,young,2344695,young.n.01,,466,211,163,34,46621116334
3,young,3673443,young,2377937,young.n.01,,43,50,266,59,435026659
4,young,4417031,young,497985,,,88,72,258,99,887225899
...,...,...,...,...,...,...,...,...,...,...,...
10483,us,1320696,us airways express,2383438,air_passage.n.01,,8,95,250,214,895250214
10484,we,858909,we have,2351682,,,18,24,138,71,182413871
10485,we,528043,"""acqua we are open""",2384202,,,175,127,69,88,1751276988
10486,sir,894544,sir,2346997,sir.n.01,,31,36,60,289,313660289


In [20]:
df_objects_search_word_result.to_csv(f"Visual_Genome_Objects_{file_ext}_Word_Result.csv", index=False)

In [21]:
df_objects_search_lemma_result = word_group_dataframe(df_genome_objects, df_word_raw_lemma_trans_list, "names", search_sample)
df_objects_search_lemma_result.rename(columns={"search_string":"lemma_en_translate"}, inplace=True)
df_objects_search_lemma_result

Unnamed: 0,lemma_en_translate,object_id,names,image_id,synsets,merged_object_ids,height,width,x_koor,y_koor,num
0,young,3907515,young,2347740,,,138,214,116,135,138214116135
1,young,260558,young,2408497,young.n.01,3809177,199,51,395,105,19951395105
2,young,3003611,young,2344695,young.n.01,,466,211,163,34,46621116334
3,young,3673443,young,2377937,young.n.01,,43,50,266,59,435026659
4,young,4417031,young,497985,,,88,72,258,99,887225899
...,...,...,...,...,...,...,...,...,...,...,...
10018,we,528043,"""acqua we are open""",2384202,,,175,127,69,88,1751276988
10019,sir,894544,sir,2346997,sir.n.01,,31,36,60,289,313660289
10020,seem,3568625,seem,2359750,,,20,83,181,155,2083181155
10021,seem,3582483,sleeve seem,2352624,sleeve.n.01,,85,80,418,414,8580418414


In [22]:
df_objects_search_lemma_result.to_csv(f"Visual_Genome_Objects_{file_ext}_Lemma_Result.csv", index=False)

###### Attribute Data

In [23]:
df_attributes_search_word_result = word_group_dataframe(df_genome_attributes, df_word_raw_word_trans_list, "attributes", search_sample)
df_attributes_search_word_result.rename(columns={"search_string":"word_en_translate"}, inplace=True)
df_attributes_search_word_result

Unnamed: 0,word_en_translate,object_id,names,image_id,attributes,synsets,height,width,x_koor,y_koor,num
0,young,2033877,man,2367637,young,man.n.01,281,202,158,23,28120215823
1,young,2687713,lady,2356338,young,lady.n.01,297,95,66,15,297956615
2,young,3087735,couple,2356369,young,couple.n.01,31,51,127,185,3151127185
3,young,2228998,cuople,2356369,young,,70,59,61,221,705961221
4,young,1870659,couple,2356369,young,couple.n.01,66,59,116,300,6659116300
...,...,...,...,...,...,...,...,...,...,...,...
12933,us,449080,flag,2395981,"white , american , us",flag.n.01,21,35,425,89,213542589
12934,we,1339153,"letters , words",2381117,"yellow , we serve",word.n.01,17,42,146,87,174214687
12935,we,255050,trunk,2408789,"thick , split , mossy , *not* stump , trunk fa...",trunk.n.01,103,126,112,0,1031261120
12936,before,2454061,ground,2361332,"worn , before",land.n.04,188,497,1,149,1884971149


In [24]:
df_attributes_search_word_result.to_csv(f"Visual_Genome_Attributes_{file_ext}_Word_Result.csv", index=False)

In [25]:
df_attributes_search_lemma_result = word_group_dataframe(df_genome_attributes, df_word_raw_lemma_trans_list, "attributes", search_sample)
df_attributes_search_lemma_result.rename(columns={"search_string":"lemma_en_translate"}, inplace=True)
df_attributes_search_lemma_result

Unnamed: 0,lemma_en_translate,object_id,names,image_id,attributes,synsets,height,width,x_koor,y_koor,num
0,young,2033877,man,2367637,young,man.n.01,281,202,158,23,28120215823
1,young,2687713,lady,2356338,young,lady.n.01,297,95,66,15,297956615
2,young,3087735,couple,2356369,young,couple.n.01,31,51,127,185,3151127185
3,young,2228998,cuople,2356369,young,,70,59,61,221,705961221
4,young,1870659,couple,2356369,young,couple.n.01,66,59,116,300,6659116300
...,...,...,...,...,...,...,...,...,...,...,...
11272,we,1339153,"letters , words",2381117,"yellow , we serve",word.n.01,17,42,146,87,174214687
11273,we,255050,trunk,2408789,"thick , split , mossy , *not* stump , trunk fa...",trunk.n.01,103,126,112,0,1031261120
11274,seem,1170925,curtains,2399247,"tan , cream , seem",curtain.n.01,96,30,279,84,963027984
11275,before,2454061,ground,2361332,"worn , before",land.n.04,188,497,1,149,1884971149


In [26]:
df_attributes_search_lemma_result.to_csv(f"Visual_Genome_Attributes_{file_ext}_Lemma_Result.csv", index=False)

###### Relationship Data

In [27]:
df_relationships_search_word_result = word_group_dataframe(df_genome_relationships, df_word_raw_word_trans_list, "sub_name", search_sample)
df_relationships_search_word_result.rename(columns={"search_string":"word_en_translate"}, inplace=True)
df_relationships_search_word_result

Unnamed: 0,word_en_translate,image_id,obj_names,obj_object_id,obj_synsets,obj_merged_object_ids,obj_height,obj_width,obj_x_koor,obj_y_koor,...,synsets,sub_name,sub_object_id,sub_synsets,sub_height,sub_width,sub_x_koor,sub_y_koor,predicate,num
0,young,107914,,1073453,mane.n.01,,132,96,323,103,...,,young,1073452,young.n.01,534,434,87,113,zebra,53443487113
1,young,107914,,1073454,ear.n.01,,58,125,331,107,...,,young,1073452,young.n.01,534,434,87,113,zebras,53443487113
2,young,107914,behind,1073456,buttocks.n.01,1073455,118,155,48,310,...,,young,1073452,young.n.01,534,434,87,113,zebras,53443487113
3,young,497985,,4417032,,,52,104,194,130,...,,young,4417031,,108,103,230,91,couple crossing street,10810323091
4,young,497985,,4417032,,,52,104,194,130,...,,young,4417031,,108,103,230,91,couple crossing street,10810323091
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8520,us,2392757,,1226147,airplane.n.01,,168,213,150,58,...,be.v.01,us airways logo,1226162,,13,16,315,98,printed on,131631598
8521,us,2383438,,1320689,airplane.n.01,,150,467,13,166,...,along.r.01,us airways express,1320696,air_passage.n.01,8,95,250,214,on,895250214
8522,us,2383438,,1320689,airplane.n.01,,150,467,13,166,...,along.r.01,us airways express,1320696,air_passage.n.01,8,95,250,214,on,895250214
8523,we,2350753,bicycle,865654,bicycle.n.01,865650,67,29,116,237,...,walk.v.01,we,865681,,111,41,135,185,walks with,11141135185


In [28]:
df_relationships_search_word_result.to_csv(f"Visual_Genome_Relationships_{file_ext}_Word_Result.csv", index=False)

In [29]:
df_relationships_search_lemma_result = word_group_dataframe(df_genome_relationships, df_word_raw_lemma_trans_list, "sub_name", search_sample)
df_relationships_search_lemma_result.rename(columns={"search_string":"lemma_en_translate"}, inplace=True)
df_relationships_search_lemma_result

Unnamed: 0,lemma_en_translate,image_id,obj_names,obj_object_id,obj_synsets,obj_merged_object_ids,obj_height,obj_width,obj_x_koor,obj_y_koor,...,synsets,sub_name,sub_object_id,sub_synsets,sub_height,sub_width,sub_x_koor,sub_y_koor,predicate,num
0,young,107914,,1073453,mane.n.01,,132,96,323,103,...,,young,1073452,young.n.01,534,434,87,113,zebra,53443487113
1,young,107914,,1073454,ear.n.01,,58,125,331,107,...,,young,1073452,young.n.01,534,434,87,113,zebras,53443487113
2,young,107914,behind,1073456,buttocks.n.01,1073455,118,155,48,310,...,,young,1073452,young.n.01,534,434,87,113,zebras,53443487113
3,young,497985,,4417032,,,52,104,194,130,...,,young,4417031,,108,103,230,91,couple crossing street,10810323091
4,young,497985,,4417032,,,52,104,194,130,...,,young,4417031,,108,103,230,91,couple crossing street,10810323091
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8353,money,2392587,,1228175,floor.n.01,,144,328,2,350,...,along.r.01,money,1228174,money.n.01,29,36,287,437,on,2936287437
8354,money,2398712,,1176826,briefcase.n.01,,201,249,2,264,...,arrive.v.01,money,1176829,money.n.01,31,42,23,251,coming out of,314223251
8355,we,2350753,bicycle,865654,bicycle.n.01,865650,67,29,116,237,...,walk.v.01,we,865681,,111,41,135,185,walks with,11141135185
8356,sir,2346997,,894530,train.n.01,,497,329,3,2,...,along.r.01,sir,894544,sir.n.01,31,36,60,289,on,313660289


In [30]:
df_relationships_search_lemma_result.to_csv(f"Visual_Genome_Relationships_{file_ext}_Lemma_Result.csv", index=False)

###### Question Answer Data

In [31]:
df_question_answers_search_word_result = word_group_dataframe(df_genome_question_answers, df_word_raw_word_trans_list, "questions", search_sample)
df_question_answers_search_word_result.rename(columns={"search_string":"word_en_translate"}, inplace=True)
df_question_answers_search_word_result

Unnamed: 0,word_en_translate,image_id,questions,que_and_ans_id,answers,num
0,young,2371445,what animal is young,1863819,the sheep,0
1,young,2375199,what animal is young,1941416,the giraffe closest to the camera,0
2,young,2376570,where is a young man,1969419,on the snow,0
3,young,2333590,how many young girls,720562,four,0
4,young,2360242,who is young walking,1637707,the girl,0
...,...,...,...,...,...,...
17987,before,2399430,what is before the catcher s face,1530020,metal grid attached to helmet,0
17988,before,2319182,what is the woman standing before,902894,a monument,0
17989,before,2412639,how long before the light changes,155279,seconds,0
17990,before,2396370,what is shown before the mountains,1389226,trees,0


In [32]:
df_question_answers_search_word_result.to_csv(f"Visual_Genome_Question_Answers_{file_ext}_Word_Result.csv", index=False)

In [33]:
df_question_answers_search_word_result2 = word_group_dataframe(df_genome_question_answers, df_word_raw_word_trans_list, "answers", search_sample)
df_question_answers_search_word_result2.rename(columns={"search_string":"word_en_translate"}, inplace=True)
df_question_answers_search_word_result2

Unnamed: 0,word_en_translate,image_id,questions,que_and_ans_id,answers,num
0,young,2362668,what kind of girl is this,1685919,young,0
1,young,2390652,what is the girl,1486329,young,0
2,young,2407218,how old is the man,198646,young,0
3,young,2368372,what age is the boy,1801156,young,0
4,young,2368432,what is the tree age,1802407,young,0
...,...,...,...,...,...,...
17206,before,2406904,when was this photo taken,1195317,before a meal,0
17207,before,2394516,when was the photo taken,1379083,before landing,0
17208,before,2395120,when does the landing gear come down,286723,before landing,0
17209,before,2323546,when is this picture taken,867985,before surfing,0


In [34]:
df_question_answers_search_word_result2.to_csv(f"Visual_Genome_Question_Answers_{file_ext}_Word_Result2.csv", index=False)

In [35]:
df_question_answers_search_lemma_result = word_group_dataframe(df_genome_question_answers, df_word_raw_lemma_trans_list, "questions", search_sample)
df_question_answers_search_lemma_result.rename(columns={"search_string":"lemma_en_translate"}, inplace=True)
df_question_answers_search_lemma_result

Unnamed: 0,lemma_en_translate,image_id,questions,que_and_ans_id,answers,num
0,young,2371445,what animal is young,1863819,the sheep,0
1,young,2375199,what animal is young,1941416,the giraffe closest to the camera,0
2,young,2376570,where is a young man,1969419,on the snow,0
3,young,2333590,how many young girls,720562,four,0
4,young,2360242,who is young walking,1637707,the girl,0
...,...,...,...,...,...,...
14207,before,2399430,what is before the catcher s face,1530020,metal grid attached to helmet,0
14208,before,2319182,what is the woman standing before,902894,a monument,0
14209,before,2412639,how long before the light changes,155279,seconds,0
14210,before,2396370,what is shown before the mountains,1389226,trees,0


In [36]:
df_question_answers_search_lemma_result.to_csv(f"Visual_Genome_Question_Answers_{file_ext}_Lemma_Result.csv", index=False)

In [37]:
df_question_answers_search_lemma_result2 = word_group_dataframe(df_genome_question_answers, df_word_raw_lemma_trans_list, "answers", search_sample)
df_question_answers_search_lemma_result2.rename(columns={"search_string":"lemma_en_translate"}, inplace=True)
df_question_answers_search_lemma_result2

Unnamed: 0,lemma_en_translate,image_id,questions,que_and_ans_id,answers,num
0,young,2362668,what kind of girl is this,1685919,young,0
1,young,2390652,what is the girl,1486329,young,0
2,young,2407218,how old is the man,198646,young,0
3,young,2368372,what age is the boy,1801156,young,0
4,young,2368432,what is the tree age,1802407,young,0
...,...,...,...,...,...,...
13981,before,2406904,when was this photo taken,1195317,before a meal,0
13982,before,2394516,when was the photo taken,1379083,before landing,0
13983,before,2395120,when does the landing gear come down,286723,before landing,0
13984,before,2323546,when is this picture taken,867985,before surfing,0


In [38]:
df_question_answers_search_lemma_result2.to_csv(f"Visual_Genome_Question_Answers_{file_ext}_Lemma_Result2.csv", index=False)

###### Region Description Data

In [39]:
df_region_descriptions_search_word_result = word_group_dataframe(df_genome_region_descriptions, df_word_raw_word_trans_list, "phrases", search_sample)
df_region_descriptions_search_word_result.rename(columns={"search_string":"word_en_translate"}, inplace=True)
df_region_descriptions_search_word_result

Unnamed: 0,word_en_translate,image_id,phrases,region_id,height,width,x_koor,y_koor,num
0,young,2385680,young cow,1842677,313,355,99,17,3133559917
1,young,2410647,young boy,188172,336,265,229,10,33626522910
2,young,2406914,young boy,366769,494,258,11,3,494258113
3,young,2404728,young boy,532294,173,148,118,87,17314811887
4,young,2408773,young boy,278183,170,120,169,106,170120169106
...,...,...,...,...,...,...,...,...,...
22735,before,2369723,baseball before the pitch,2203889,20,14,147,273,2014147273
22736,before,2352973,mark eward written before,3003171,11,50,213,325,1150213325
22737,before,2318159,a table before the carpet,4465885,80,116,181,135,80116181135
22738,before,2411301,plane just before landing,497637,191,342,8,17,191342817


In [40]:
df_region_descriptions_search_word_result.to_csv(f"Visual_Genome_Region_Descriptions_{file_ext}_Word_Result.csv", index=False)

In [41]:
df_region_descriptions_search_lemma_result = word_group_dataframe(df_genome_region_descriptions, df_word_raw_lemma_trans_list, "phrases", search_sample)
df_region_descriptions_search_lemma_result.rename(columns={"search_string":"lemma_en_translate"}, inplace=True)
df_region_descriptions_search_lemma_result

Unnamed: 0,lemma_en_translate,image_id,phrases,region_id,height,width,x_koor,y_koor,num
0,young,2385680,young cow,1842677,313,355,99,17,3133559917
1,young,2410647,young boy,188172,336,265,229,10,33626522910
2,young,2406914,young boy,366769,494,258,11,3,494258113
3,young,2404728,young boy,532294,173,148,118,87,17314811887
4,young,2408773,young boy,278183,170,120,169,106,170120169106
...,...,...,...,...,...,...,...,...,...
18140,before,2369723,baseball before the pitch,2203889,20,14,147,273,2014147273
18141,before,2352973,mark eward written before,3003171,11,50,213,325,1150213325
18142,before,2318159,a table before the carpet,4465885,80,116,181,135,80116181135
18143,before,2411301,plane just before landing,497637,191,342,8,17,191342817


In [42]:
df_region_descriptions_search_lemma_result.to_csv(f"Visual_Genome_Region_Descriptions_{file_ext}_Lemma_Result.csv", index=False)

#### Copy Move And Delete

In [43]:
output_file = glob.glob(f"Visual_Genome_*_Result*.csv")
output_file

['Visual_Genome_Objects_1000_Word_Result.csv',
 'Visual_Genome_Objects_1000_Lemma_Result.csv',
 'Visual_Genome_Attributes_1000_Word_Result.csv',
 'Visual_Genome_Attributes_1000_Lemma_Result.csv',
 'Visual_Genome_Relationships_1000_Word_Result.csv',
 'Visual_Genome_Relationships_1000_Lemma_Result.csv',
 'Visual_Genome_Question_Answers_1000_Word_Result.csv',
 'Visual_Genome_Question_Answers_1000_Word_Result2.csv',
 'Visual_Genome_Question_Answers_1000_Lemma_Result.csv',
 'Visual_Genome_Question_Answers_1000_Lemma_Result2.csv',
 'Visual_Genome_Region_Descriptions_1000_Word_Result.csv',
 'Visual_Genome_Region_Descriptions_1000_Lemma_Result.csv']

In [44]:
for l in output_file:
    source = l # source directory
    destination = path
    shutil.copy2(source, destination)

In [45]:
for j in output_file:
    try:
        os.remove(j)
    except:
        pass