<a href="https://colab.research.google.com/github/TofunmiSodimu/Novelty-Detection/blob/main/CreateTrainingData.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
This code gets all movable/grabbable objs, their locations at certain times of the day,
and the activities being performed at that time from the HOMER_PLUS dataset.
"""

'\nThis code gets all movable/grabbable objs, their locations at certain times of the day,\nand the activities being performed at that time from the HOMER_PLUS dataset.\n'

In [None]:
# Download HOMER_PLUS dataset from github
!wget https://github.com/Maithili/HOMER_PLUS/zipball/master.zip


--2024-04-01 17:02:00--  https://github.com/Maithili/HOMER_PLUS/zipball/master.zip
Resolving github.com (github.com)... 140.82.114.4
Connecting to github.com (github.com)|140.82.114.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://codeload.github.com/Maithili/HOMER_PLUS/legacy.zip/main [following]
--2024-04-01 17:02:00--  https://codeload.github.com/Maithili/HOMER_PLUS/legacy.zip/main
Resolving codeload.github.com (codeload.github.com)... 140.82.114.10
Connecting to codeload.github.com (codeload.github.com)|140.82.114.10|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘master.zip’

master.zip              [ <=>                ] 108.78M  4.61MB/s    in 24s     

2024-04-01 17:02:24 (4.48 MB/s) - ‘master.zip’ saved [114059998]



In [None]:
# Unzip folder
!unzip /content/master.zip

In [32]:
import json
import random
import numpy as np
import csv

class novelty:
  def __init__(self):
    # Create time_arr and room_id arr
    self.time_arr = np.arange(360,1600,5)
    self.room_ids = ['1','41','162','241']
    self.data = {}
    self.time_obj = {}
    self.id_obj = {}
    self.graph_times = []
    self.time_obj_arr = []

  def get_data(self):
    # For each day in Household A HOMER PLUS, convert training data to csv form
    for num in range(65):

      # Reset everything
      self.data = {}
      self.time_obj = {}
      self.id_obj = {}
      self.graph_times = []
      self.time_obj_arr = []

      # Open JSON file
      number = str(num).zfill(3)
      file_name = '/content/Maithili-HOMER_PLUS-006621c/HouseholdA/routines_train/' + number + '.json'
      f = open(file_name)

      # Returns JSON object as a dictionary
      self.data = json.load(f)

      # Get times at which all scene graphs in current JSON were captured
      self.graph_times = self.data['times']

      # Call function to get matching scene graph for each time in time_arr
      self.get_dets()

      # Call function to convert time_obj dict to list
      self.convert_list_rooms()

      # Call function to convert time_obj_list to csv
      self.convert_csv(number,'train')

    # For each day in Household A HOMER PLUS, convert testing data to csv form and add to traning dataset
    for num in range(10):

      # Reset everything
      self.data = {}
      self.time_obj = {}
      self.id_obj = {}
      self.graph_times = []
      self.time_obj_arr = []

      # Open JSON file
      number = str(num).zfill(3)
      file_name = '/content/Maithili-HOMER_PLUS-006621c/HouseholdA/routines_test/' + number + '.json'
      f = open(file_name)

      # Returns JSON object as a dictionary
      self.data = json.load(f)

      # Get times at which all scene graphs in current JSON were captured
      self.graph_times = self.data['times']

      # Call function to get matching scene graph for each time in time_arr
      self.get_dets()

      # Call function to convert time_obj dict to list
      self.convert_list_rooms()

      # Call function to convert time_obj_list to csv
      self.convert_csv(str(num+65).zfill(3),'train')

  def get_dets(self):
    # Iterating through the json to get the names and ids of all of all placable/grabbable objects.

    # Iterate through all times in time_arr and get the nearest/corresponding scene graph from the JSON
    for a in range(len(self.time_arr)):

      # Get index of nearest/corresponding time from JSON
      chosen_time = self.time_arr[a]
      difference = self.graph_times-self.time_arr[a]
      nearest_idx = np.where(difference >= 0, difference, np.inf).argmin()
      nearest_time = self.graph_times[nearest_idx]

      nodes = self.data['graphs'][nearest_idx]['nodes']
      edges = self.data['graphs'][nearest_idx]['edges']

      # Iterate through all objects in each node
      for j in nodes:

        # Add objects and their ids to dictionary for ease of matching
        if j['id'] not in self.id_obj:
          self.id_obj[j['id']] = j['class_name']

        # Add objects,their ids,and current activity being performed at chosen time to another dictionary
        if ('Furniture' not in j['category']) and (('MOVABLE' in j['properties']) or ('GRABBABLE' in j['properties'])):
          idx = a
          # This is done to make sure we always record an activity because the activity list is sometimes shorter than the number of scene graphs
          while idx >= len(self.data['activities']):
            idx -= 1

          if chosen_time in self.time_obj:
            self.time_obj[chosen_time][j['id']] = [j['class_name'],self.data['activities'][idx]]
          else:
            self.time_obj[chosen_time] = {j['id']:[j['class_name'],self.data['activities'][idx]]}

      # Iterate through all edges to get current location of objects
      for k in edges:
        from_ids = list(self.time_obj[chosen_time].keys())
        if (chosen_time in self.time_obj) and (k["from_id"] in from_ids) and (k["relation_type"] != "CLOSE"):
          self.time_obj[chosen_time][k["from_id"]].append(str(k["to_id"]))
          self.time_obj[chosen_time][k["from_id"]].append(self.id_obj[k["to_id"]])

  def convert_list_rooms(self):
    # Create a list with obj names, activity, location and location id, surface and surface id for all times in our time_arr
    for time in self.time_obj:
      for obj_id in self.time_obj[time]:
        activity = self.time_obj[time][obj_id][1]


        # e.g., ['toothbrush', 'Wakeup', '1012', 'toothbrush_holder', '18', 'bathroom_counter', '1', 'bathroom']
        if (len(self.time_obj[time][obj_id]) > 6):
          if self.time_obj[time][obj_id][2] in self.room_ids:
            room_id = self.time_obj[time][obj_id][2]
            room_name = self.time_obj[time][obj_id][3]
          elif self.time_obj[time][obj_id][4] in self.room_ids:
            room_id = self.time_obj[time][obj_id][4]
            room_name = self.time_obj[time][obj_id][5]
          else:
            room_id = self.time_obj[time][obj_id][6]
            room_name = self.time_obj[time][obj_id][7]

        # e.g., ['coffee', 'Wakeup', '117', 'cupboard', '41', 'dining_room']
        elif (len(self.time_obj[time][obj_id]) > 4):
          if self.time_obj[time][obj_id][2] in self.room_ids:
            room_id = self.time_obj[time][obj_id][2]
            room_name = self.time_obj[time][obj_id][3]
          else:
            room_id = self.time_obj[time][obj_id][4]
            room_name = self.time_obj[time][obj_id][5]

        # e.g., ['trashcan', 'Wakeup', '41', 'dining_room']
        else:
          room_id = self.time_obj[time][obj_id][2]
          room_name = self.time_obj[time][obj_id][3]
        self.time_obj_arr.append({'Time':time,'Obj_ID':obj_id,'Obj_name':self.id_obj[obj_id],'Room_ID':room_id,'Room_name':room_name,'Activity':activity})

  def convert_list_all(self):
    # Create a list with obj names, activity, location and location id, surface and surface id for all times in our time_arr
    for time in self.time_obj:
      for obj_id in self.time_obj[time]:
        activity = self.time_obj[time][obj_id][1]


        # e.g., ['toothbrush', 'Wakeup', '1012', 'toothbrush_holder', '18', 'bathroom_counter', '1', 'bathroom']
        if (len(self.time_obj[time][obj_id]) > 6):
          if self.time_obj[time][obj_id][2] in self.room_ids:
            room_id = self.time_obj[time][obj_id][2]
            room_name = self.time_obj[time][obj_id][3]
            surface_id = self.time_obj[time][obj_id][4]
            surface_name = self.time_obj[time][obj_id][5]
          elif self.time_obj[time][obj_id][4] in self.room_ids:
            surface_id = self.time_obj[time][obj_id][2]
            surface_name = self.time_obj[time][obj_id][3]
            room_id = self.time_obj[time][obj_id][4]
            room_name = self.time_obj[time][obj_id][5]
          else:
            surface_id = self.time_obj[time][obj_id][4]
            surface_name = self.time_obj[time][obj_id][5]
            room_id = self.time_obj[time][obj_id][6]
            room_name = self.time_obj[time][obj_id][7]

        # e.g., ['coffee', 'Wakeup', '117', 'cupboard', '41', 'dining_room']
        elif (len(self.time_obj[time][obj_id]) > 4):
          if self.time_obj[time][obj_id][2] in self.room_ids:
            room_id = self.time_obj[time][obj_id][2]
            room_name = self.time_obj[time][obj_id][3]
            surface_id = self.time_obj[time][obj_id][4]
            surface_name = self.time_obj[time][obj_id][5]
          else:
            surface_id = self.time_obj[time][obj_id][2]
            surface_name = self.time_obj[time][obj_id][3]
            room_id = self.time_obj[time][obj_id][4]
            room_name = self.time_obj[time][obj_id][5]

        # e.g., ['trashcan', 'Wakeup', '41', 'dining_room']
        else:
          room_id = self.time_obj[time][obj_id][2]
          room_name = self.time_obj[time][obj_id][3]
          surface_id = float('inf')
          surface_name = ''
        self.time_obj_arr.append({'Time':time,'Obj_ID':obj_id,'Obj_name':self.id_obj[obj_id],'Room_ID':room_id,'Room_name':room_name,'Surface_ID':surface_id,'Surface_name':surface_name,'Activity':activity})

  def convert_csv(self,number,name):

    field_names= ['Time', 'Obj_ID', 'Obj_name', 'Room_ID', 'Room_name', 'Surface_ID', 'Surface_name', 'Activity']
    name += number + '.csv'
    with open(name,'w') as csvfile:
      writer = csv.DictWriter(csvfile, fieldnames=field_names)
      writer.writeheader()
      writer.writerows(self.time_obj_arr)


In [33]:
novelty().get_data()