In [1]:
import os
import random
import math
import shutil
import matplotlib.pyplot as plt
import torch
random.seed(10)

In [2]:
def get_test_split(data_dir, test_ratio = 0.1):
    '''
    Identifies files for the test set 
    '''
    
    #Obtain List of files in directory
    files = list(os.listdir(data_dir))

    num_files = len(files) #Get number of signals in directory

    test_count = math.floor(len(files)*test_ratio) #Calculate number of signals for testing

    test_files = files[:test_count] # Test_files

    train_valid_files = files[test_count:] # Train_valid files
    
    return test_files, train_valid_files
    

In [3]:
def create_dir_and_move_files(files_to_move, signal_type, base_dir, set_split):
    '''
    Will create a directory for three fold cross validation data and move the data
    into it
    '''
    
    # Creates Directory for three cross fold validation data
    if not os.path.isdir(base_dir + "Three_Fold_Cross"):
        os.mkdir(base_dir + "Three_Fold_Cross")
        
    if not os.path.isdir(base_dir + "Three_Fold_Cross\\" + set_split):
        os.mkdir(base_dir + "Three_Fold_Cross\\" + set_split)
    os.mkdir(base_dir + "Three_Fold_Cross\\" + set_split + signal_type)
    
    # Moves Signals into folds
    for i in files_to_move:
        shutil.move(base_dir + signal_type + i, base_dir + "Three_Fold_Cross\\" + set_split + signal_type)

In [4]:
def get_training_folds(DF_train_valid_files, PF_train_valid_files, Prick_train_valid_files, method = "min_class"):
    '''
    Separates data into folds
    '''
    
    if method == "min_class": # Each fold is /3 the size of the smallest class
        signals_per_class = math.floor(min([len(DF_train_valid_files)/3,len(PF_train_valid_files)/3,len(Prick_train_valid_files)/3]))
        
        DF_Fold1 = DF_train_valid_files[:signals_per_class]
        DF_Fold2 = DF_train_valid_files[signals_per_class:2*signals_per_class]
        DF_Fold3 = DF_train_valid_files[2*signals_per_class:3*signals_per_class]
        
        PF_Fold1 = PF_train_valid_files[:signals_per_class]
        PF_Fold2 = PF_train_valid_files[signals_per_class:2*signals_per_class]
        PF_Fold3 = PF_train_valid_files[2*signals_per_class:3*signals_per_class]
        
        Prick_Fold1 = Prick_train_valid_files[:signals_per_class]
        Prick_Fold2 = Prick_train_valid_files[signals_per_class:2*signals_per_class]
        Prick_Fold3 = Prick_train_valid_files[2*signals_per_class:3*signals_per_class]
        
    else: # Folds are not balanced
        split_size = math.floor(len(DF_train_valid_files)/3)
        DF_Fold1 = DF_train_valid_files[:split_size]
        DF_Fold2 = DF_train_valid_files[split_size:2*split_size]
        DF_Fold3 = DF_train_valid_files[2*split_size:3*split_size]
        
        split_size = math.floor(len(PF_train_valid_files)/3)
        PF_Fold1 = PF_train_valid_files[:split_size]
        PF_Fold2 = PF_train_valid_files[split_size:2*split_size]
        PF_Fold3 = PF_train_valid_files[2*split_size:3*split_size]
        
        split_size = math.floor(len(Prick_train_valid_files)/3)
        Prick_Fold1 = Prick_train_valid_files[:split_size]
        Prick_Fold2 = Prick_train_valid_files[split_size:2*split_size]
        Prick_Fold3 = Prick_train_valid_files[2*split_size:3*split_size]
        
        
    return DF_Fold1, DF_Fold2, DF_Fold3, PF_Fold1, PF_Fold2, PF_Fold3, Prick_Fold1, Prick_Fold2, Prick_Fold3

In [5]:
test_ratio = 0.1

for i in [2,3,4,5,6,7,8,9,10]:

    base_dir = "M:\Peripheral Nerve Studies\MCC Projects\Aseem G\Models\Pytorch\Data\Spike Firing Rate 1500\Rat " + str(i) + "\\"

    DF_test_files, DF_train_valid_files = get_test_split(base_dir + "DF", test_ratio)
    PF_test_files, PF_train_valid_files = get_test_split(base_dir + "PF", test_ratio)
    Prick_test_files, Prick_train_valid_files = get_test_split(base_dir + "Prick", test_ratio)
    
    set_split = "Test\\"
    create_dir_and_move_files(DF_test_files, "DF\\", base_dir, set_split)
    create_dir_and_move_files(PF_test_files, "PF\\", base_dir, set_split)
    create_dir_and_move_files(Prick_test_files, "Prick\\", base_dir, set_split)

    DF_Fold1, DF_Fold2, DF_Fold3, PF_Fold1, PF_Fold2, PF_Fold3, Prick_Fold1, Prick_Fold2, Prick_Fold3 = get_training_folds(DF_train_valid_files, PF_train_valid_files, Prick_train_valid_files)

    set_split = "Fold1\\"
    create_dir_and_move_files(DF_Fold1, "DF\\", base_dir, set_split)
    create_dir_and_move_files(PF_Fold1, "PF\\", base_dir, set_split)
    create_dir_and_move_files(Prick_Fold1, "Prick\\", base_dir, set_split)

    set_split = "Fold2\\"
    create_dir_and_move_files(DF_Fold2, "DF\\", base_dir, set_split)
    create_dir_and_move_files(PF_Fold2, "PF\\", base_dir, set_split)
    create_dir_and_move_files(Prick_Fold2, "Prick\\", base_dir, set_split)

    set_split = "Fold3\\"
    create_dir_and_move_files(DF_Fold3, "DF\\", base_dir, set_split)
    create_dir_and_move_files(PF_Fold3, "PF\\", base_dir, set_split)
    create_dir_and_move_files(Prick_Fold3, "Prick\\", base_dir, set_split)