Index > Indices; clarified example use of freezing train/test split

DeepLabCut · May 26, 2020 · 3541d72 · 3541d72
1 parent 97239d5
commit 3541d72
Show file tree

Hide file tree

Showing 5 changed files with 48 additions and 46 deletions.
diff --git a/deeplabcut/generate_training_dataset/multiple_individuals_trainingsetmanipulation.py b/deeplabcut/generate_training_dataset/multiple_individuals_trainingsetmanipulation.py
@@ -163,8 +163,8 @@ def strip_cropped_image_name(path):
             # Map back to the original indices.
             temp = [name for i, name in enumerate(img_names) if i in test_inds_temp]
             mask = Data.index.str.contains("|".join(temp))
-            testIndexes = np.flatnonzero(mask)
-            trainIndexes = np.flatnonzero(~mask)
+            testIndices = np.flatnonzero(mask)
+            trainIndices = np.flatnonzero(~mask)
 
             ####################################################
             # Generating data structure with labeled information & frame metadata (for deep cut)
@@ -174,7 +174,7 @@ def strip_cropped_image_name(path):
             data = []
             print("Creating training data for ", shuffle, trainFraction)
             print("This can take some time...")
-            for jj in tqdm(trainIndexes):
+            for jj in tqdm(trainIndices):
                 jointsannotated = False
                 H = {}
                 # load image to get dimensions:
@@ -253,7 +253,7 @@ def strip_cropped_image_name(path):
                 if jointsannotated:  # exclude images without labels
                     data.append(H)
 
-            if len(trainIndexes) > 0:
+            if len(trainIndices) > 0:
                 (
                     datafilename,
                     metadatafilename,
@@ -266,8 +266,8 @@ def strip_cropped_image_name(path):
                 auxiliaryfunctions.SaveMetadata(
                     os.path.join(project_path, metadatafilename),
                     data,
-                    trainIndexes,
-                    testIndexes,
+                    trainIndices,
+                    testIndices,
                     trainFraction,
                 )
 

diff --git a/deeplabcut/generate_training_dataset/trainingsetmanipulation.py b/deeplabcut/generate_training_dataset/trainingsetmanipulation.py
@@ -672,10 +672,10 @@ def SplitTrials(trialindex, trainFraction=0.8):
     else:
         trainsetsize = int(len(trialindex) * round(trainFraction, 2))
         shuffle = np.random.permutation(trialindex)
-        testIndexes = shuffle[trainsetsize:]
-        trainIndexes = shuffle[:trainsetsize]
+        testIndices = shuffle[trainsetsize:]
+        trainIndices = shuffle[:trainsetsize]
 
-        return (trainIndexes, testIndexes)
+        return (trainIndices, testIndices)
 
 
 def mergeandsplit(config, trainindex=0, uniform=True, windows2linux=False):
@@ -707,17 +707,16 @@ def mergeandsplit(config, trainindex=0, uniform=True, windows2linux=False):
     Examples
     --------
     To create a leave-one-folder-out model:
-    >>> trainIndexes, testIndexes=deeplabcut.mergeandsplit(config,trainindex=0,uniform=False)
-    returns the indices for the first video folder (as defined in config file) as testIndexes and all others as trainIndexes.
+    >>> trainIndices, testIndices=deeplabcut.mergeandsplit(config,trainindex=0,uniform=False)
+    returns the indices for the first video folder (as defined in config file) as testIndices and all others as trainIndices.
     You can then create the training set by calling (e.g. defining it as Shuffle 3):
-    >>> deeplabcut.create_training_dataset(config,Shuffles=[3],trainIndexes=trainIndexes,testIndexes=testIndexes)
+    >>> deeplabcut.create_training_dataset(config,Shuffles=[3],trainIndices=trainIndices,testIndices=testIndices)
 
     To freeze a (uniform) split:
-    >>> trainIndexes, testIndexes=deeplabcut.mergeandsplit(config,trainindex=0,uniform=True)
-    You can then create two model instances that have the identical trainingset. Thereby you can assess the role of various parameters on the performance of DLC.
+    >>> trainIndices, testIndices=deeplabcut.mergeandsplit(config,trainindex=0,uniform=True)
 
-    >>> deeplabcut.create_training_dataset(config,Shuffles=[0],trainIndexes=trainIndexes,testIndexes=testIndexes)
-    >>> deeplabcut.create_training_dataset(config,Shuffles=[1],trainIndexes=trainIndexes,testIndexes=testIndexes)
+    You can then create two model instances that have the identical trainingset. Thereby you can assess the role of various parameters on the performance of DLC.
+    >>> deeplabcut.create_training_dataset(config,Shuffles=[0,1],trainIndices=[trainIndices, trainIndices],testIndices=[testIndices, testIndices])
     --------
 
     """
@@ -750,21 +749,21 @@ def mergeandsplit(config, trainindex=0, uniform=True, windows2linux=False):
     if uniform == True:
         TrainingFraction = cfg["TrainingFraction"]
         trainFraction = TrainingFraction[trainindex]
-        trainIndexes, testIndexes = SplitTrials(range(len(Data.index)), trainFraction)
+        trainIndices, testIndices = SplitTrials(range(len(Data.index)), trainFraction)
     else:  # leave one folder out split
         videos = cfg["video_sets"].keys()
         test_video_name = [Path(i).stem for i in videos][trainindex]
         print("Excluding the following folder (from training):", test_video_name)
-        trainIndexes, testIndexes = [], []
+        trainIndices, testIndices = [], []
         for index, name in enumerate(Data.index):
             # print(index,name.split(os.sep)[1])
             if test_video_name == name.split(os.sep)[1]:  # this is the video name
                 # print(name,test_video_name)
-                testIndexes.append(index)
+                testIndices.append(index)
             else:
-                trainIndexes.append(index)
+                trainIndices.append(index)
 
-    return trainIndexes, testIndexes
+    return trainIndices, testIndices
 
 
 @lru_cache(maxsize=None)
@@ -822,8 +821,8 @@ def create_training_dataset(
     Shuffles=None,
     windows2linux=False,
     userfeedback=False,
-    trainIndexes=None,
-    testIndexes=None,
+    trainIndices=None,
+    testIndices=None,
     net_type=None,
     augmenter_type=None,
 ):
@@ -852,12 +851,12 @@ def create_training_dataset(
         If this is set to false, then all requested train/test splits are created (no matter if they already exist). If you
         want to assure that previous splits etc. are not overwritten, then set this to True and you will be asked for each split.
 
-    trainIndexes: list of lists, optional (default=None)
+    trainIndices: list of lists, optional (default=None)
         List of one or multiple lists containing train indexes.
         A list containing two lists of training indexes will produce two splits.
 
-    testIndexes: list of lists, optional (default=None)
-        List of test indexes.
+    testIndices: list of lists, optional (default=None)
+        List of one or multiple lists containing test indexes.
 
     net_type: string
         Type of networks. Currently resnet_50, resnet_101, resnet_152, mobilenet_v2_1.0,mobilenet_v2_0.75, mobilenet_v2_0.5, and mobilenet_v2_0.35 are supported.
@@ -935,8 +934,8 @@ def create_training_dataset(
         else:
             Shuffles = [i for i in Shuffles if isinstance(i, int)]
 
-        # print(trainIndexes,testIndexes, Shuffles, augmenter_type,net_type)
-        if trainIndexes is None and testIndexes is None:
+        # print(trainIndices,testIndices, Shuffles, augmenter_type,net_type)
+        if trainIndices is None and testIndices is None:
             splits = [
                 (
                     trainFraction,
@@ -947,13 +946,13 @@ def create_training_dataset(
                 for shuffle in Shuffles
             ]
         else:
-            if len(trainIndexes) != len(testIndexes) != len(Shuffles):
+            if len(trainIndices) != len(testIndices) != len(Shuffles):
                 raise ValueError(
                     "Number of Shuffles and train and test indexes should be equal."
                 )
             splits = []
             for shuffle, (train_inds, test_inds) in enumerate(
-                zip(trainIndexes, testIndexes)
+                zip(trainIndices, testIndices)
             ):
                 trainFraction = round(
                     len(train_inds) * 1.0 / (len(train_inds) + len(test_inds)), 2
@@ -967,8 +966,8 @@ def create_training_dataset(
 
         bodyparts = cfg["bodyparts"]
         nbodyparts = len(bodyparts)
-        for trainFraction, shuffle, (trainIndexes, testIndexes) in splits:
-            if len(trainIndexes) > 0:
+        for trainFraction, shuffle, (trainIndices, testIndices) in splits:
+            if len(trainIndices) > 0:
                 if userfeedback:
                     trainposeconfigfile, _, _ = training.return_train_network_path(
                         config,
@@ -1004,7 +1003,7 @@ def create_training_dataset(
                 # Saving data file (convert to training file for deeper cut (*.mat))
                 ################################################################################
                 data, MatlabData = format_training_data(
-                    Data, trainIndexes, nbodyparts, project_path
+                    Data, trainIndices, nbodyparts, project_path
                 )
                 sio.savemat(
                     os.path.join(project_path, datafilename), {"dataset": MatlabData}
@@ -1016,8 +1015,8 @@ def create_training_dataset(
                 auxiliaryfunctions.SaveMetadata(
                     os.path.join(project_path, metadatafilename),
                     data,
-                    trainIndexes,
-                    testIndexes,
+                    trainIndices,
+                    testIndices,
                     trainFraction,
                 )
 
@@ -1173,7 +1172,7 @@ def create_training_model_comparison(
     largestshuffleindex = get_largestshuffle_index(config)
 
     for shuffle in range(num_shuffles):
-        trainIndexes, testIndexes = mergeandsplit(
+        trainIndices, testIndices = mergeandsplit(
             config, trainindex=trainindex, uniform=True
         )
         for idx_net, net in enumerate(net_types):
@@ -1198,8 +1197,8 @@ def create_training_model_comparison(
                     config,
                     Shuffles=[get_max_shuffle_idx],
                     net_type=net,
-                    trainIndexes=[trainIndexes],
-                    testIndexes=[testIndexes],
+                    trainIndices=[trainIndices],
+                    testIndices=[testIndices],
                     augmenter_type=aug,
                     userfeedback=userfeedback,
                     windows2linux=windows2linux,

diff --git a/deeplabcut/utils/auxiliaryfunctions.py b/deeplabcut/utils/auxiliaryfunctions.py
@@ -349,23 +349,23 @@ def SaveData(PredicteData, metadata, dataname, pdindex, imagenames, save_as_csv)
         pickle.dump(metadata, f, pickle.HIGHEST_PROTOCOL)
 
 
-def SaveMetadata(metadatafilename, data, trainIndexes, testIndexes, trainFraction):
+def SaveMetadata(metadatafilename, data, trainIndices, testIndices, trainFraction):
     with open(metadatafilename, "wb") as f:
         # Pickle the 'labeled-data' dictionary using the highest protocol available.
         pickle.dump(
-            [data, trainIndexes, testIndexes, trainFraction], f, pickle.HIGHEST_PROTOCOL
+            [data, trainIndices, testIndices, trainFraction], f, pickle.HIGHEST_PROTOCOL
         )
 
 
 def LoadMetadata(metadatafile):
     with open(metadatafile, "rb") as f:
         [
             trainingdata_details,
-            trainIndexes,
-            testIndexes,
+            trainIndices,
+            testIndices,
             testFraction_data,
         ] = pickle.load(f)
-        return trainingdata_details, trainIndexes, testIndexes, testFraction_data
+        return trainingdata_details, trainIndices, testIndices, testFraction_data
 
 
 def get_immediate_subdirectories(a_dir):

diff --git a/examples/testscript.py b/examples/testscript.py
@@ -360,6 +360,9 @@ def make_frame(t):
 deeplabcut.export_model(path_config_file, shuffle=2, make_tar=False)
 
 
+trainIndices, testIndices = deeplabcut.mergeandsplit(path_config_file, trainindex=0,uniform=False)
+deeplabcut.create_training_dataset(path_config_file, Shuffles=[0,1], trainIndices=[trainIndices, trainIndices], testIndices=[testIndices,testIndices])
+
 print("ALL DONE!!! - default cases are functional.")
 print("Re-import DLC with env. variable set to test DLC light mode.")
 os.environ["DLClight"] = "True"

diff --git a/examples/testscript_openfielddata_augmentationcomparison.py b/examples/testscript_openfielddata_augmentationcomparison.py
@@ -87,9 +87,9 @@
 
 ##create one split and make Shuffle 2 and 3 have the same split.
 """
-trainIndexes, testIndexes=deeplabcut.mergeandsplit(path_config_file,trainindex=0,uniform=True)
-deeplabcut.create_training_dataset(path_config_file,Shuffles=[2],trainIndexes=trainIndexes,testIndexes=testIndexes)
-deeplabcut.create_training_dataset(path_config_file,Shuffles=[3],trainIndexes=trainIndexes,testIndexes=testIndexes)
+trainIndices, testIndices=deeplabcut.mergeandsplit(path_config_file,trainindex=0,uniform=True)
+deeplabcut.create_training_dataset(path_config_file,Shuffles=[2],trainIndices=trainIndices,testIndices=testIndices)
+deeplabcut.create_training_dataset(path_config_file,Shuffles=[3],trainIndices=trainIndices,testIndices=testIndices)
 for shuffle in [2,3]:
 	if shuffle==3:
 		posefile=os.path.join(cfg['project_path'],'dlc-models/iteration-'+str(cfg['iteration'])+'/'+ cfg['Task'] + cfg['date'] + '-trainset' + str(int(cfg['TrainingFraction'][0] * 100)) + 'shuffle' + str(shuffle),'train/pose_cfg.yaml')