# Process Each Scenario Dataset

In [1]:
from model.data_prep import prepare_data

Camera Location: <br>
default     0.5 -0.75 1.8 <br>
offset1     0.7 -0.75 1.8 <br>
offset2     0.65 -0.60 1.7 <br>
offset3     0.75 -0.7 1.7 <br>
offset4     0.78 -0.68 1.7

In [2]:
prepare_data('Robot_Arm_Data/bright_default_cam', camera_loc='0.5,-0.75,1.8', drop_static=True)
prepare_data('Robot_Arm_Data/dark_default_cam', camera_loc='0.5,-0.75,1.8', drop_static=True)
prepare_data('Robot_Arm_Data/bright_offset1', camera_loc='0.7,-0.75,1.8', drop_static=True)
prepare_data('Robot_Arm_Data/dark_offset2', camera_loc='0.65,-0.65,1.7', drop_static=True)
prepare_data('Robot_Arm_Data/bright_offset3_mud', camera_loc='0.75,-0.7,1.7', drop_static=True)
prepare_data('Robot_Arm_Data/dark_offset4_mud', camera_loc='0.78,-0.68,1.7', drop_static=True)

Removed 108 static images
Removed 12 static images
Removed 21 static images
Removed 24 static images
Removed 18 static images
Removed 21 static images


# Combine for Training

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [4]:
bright_default_cam = pd.read_json('Robot_Arm_Data/bright_default_cam.json')
bright_default_cam['name'] = 'bright_default_cam'

dark_default_cam = pd.read_json('Robot_Arm_Data/dark_default_cam.json')
dark_default_cam['name'] = 'dark_default_cam'

bright_offset1 = pd.read_json('Robot_Arm_Data/bright_offset1.json')
bright_offset1['name'] = 'bright_offset1'

dark_offset2 = pd.read_json('Robot_Arm_Data/dark_offset2.json')
dark_offset2['name'] = 'dark_offset2'

bright_offset3_mud = pd.read_json('Robot_Arm_Data/bright_offset3_mud.json')
bright_offset3_mud['name'] = 'bright_offset3_mud'

dark_offset4_mud = pd.read_json('Robot_Arm_Data/dark_offset4_mud.json')
dark_offset4_mud['name'] = 'dark_offset4_mud'

In [5]:
df = pd.concat([bright_default_cam, dark_default_cam, bright_offset1, dark_offset2, bright_offset3_mud, dark_offset4_mud], ignore_index=True)

Check Object counts

In [6]:
df['object'] = df['output'].apply(lambda x: x.split(' ')[0])
df.head()

Unnamed: 0,images,prompt,output,name,object
0,Robot_Arm_Data/bright_default_cam/image_202503...,I need to separate this portion from the rest....,"<obj>bowl</obj> <pose>0.415,-0.375,0.58</pose>...",bright_default_cam,<obj>bowl</obj>
1,Robot_Arm_Data/bright_default_cam/image_202503...,I should grab a plastic cup for my coffee.<cam...,"<obj>plastic_cup</obj> <pose>0.6,-0.905,0.548<...",bright_default_cam,<obj>plastic_cup</obj>
2,Robot_Arm_Data/bright_default_cam/image_202503...,This meal would be easier to eat with somethin...,"<obj>bowl</obj> <pose>0.485,-0.94,0.58</pose> ...",bright_default_cam,<obj>bowl</obj>
3,Robot_Arm_Data/bright_default_cam/image_202503...,I want to keep all my food together instead of...,"<obj>bowl</obj> <pose>0.53,-1.02,0.58</pose> <...",bright_default_cam,<obj>bowl</obj>
4,Robot_Arm_Data/bright_default_cam/image_202503...,I need something that lets me access informati...,"<obj>samsung_j8_black</obj> <pose>0.59,-1.12,0...",bright_default_cam,<obj>samsung_j8_black</obj>


In [7]:
df.value_counts('object')

object
<obj>coke_can</obj>            327
<obj>peach</obj>               315
<obj>samsung_j8_black</obj>    303
<obj>strawberry</obj>          288
<obj>plastic_cup</obj>         276
<obj>bowl</obj>                234
<obj>reset</obj>               175
Name: count, dtype: int64

## Split dataset

Split dataset by stratifying based on the "scenario + object type"

In [8]:
y = df['name'] + df['object']
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.1, stratify=y)

In [9]:
X_train.object.value_counts()

object
<obj>coke_can</obj>            295
<obj>peach</obj>               282
<obj>samsung_j8_black</obj>    273
<obj>strawberry</obj>          259
<obj>plastic_cup</obj>         248
<obj>bowl</obj>                210
<obj>reset</obj>               159
Name: count, dtype: int64

In [10]:
X_test.object.value_counts()

object
<obj>peach</obj>               33
<obj>coke_can</obj>            32
<obj>samsung_j8_black</obj>    30
<obj>strawberry</obj>          29
<obj>plastic_cup</obj>         28
<obj>bowl</obj>                24
<obj>reset</obj>               16
Name: count, dtype: int64

In [11]:
X_train.name.value_counts()

name
bright_default_cam    1078
dark_default_cam       136
bright_offset3_mud     131
bright_offset1         129
dark_offset4_mud       128
dark_offset2           124
Name: count, dtype: int64

In [12]:
X_test.name.value_counts()

name
bright_default_cam    120
dark_default_cam       16
dark_offset2           15
dark_offset4_mud       14
bright_offset3_mud     14
bright_offset1         13
Name: count, dtype: int64

Shuffle and export

In [None]:
X_train.sample(frac=1).to_json('Robot_Arm_Data/train.json', orient='records', indent=4, force_ascii=False)

In [None]:
X_test.sample(frac=1).to_json('Robot_Arm_Data/test.json', orient='records', indent=4, force_ascii=False)