In [7]:
"""
data_partitioner.ipynb
Created by Chikuma, 08/22/2019
"""
import numpy as np
import pandas as pd
from math import floor

In [8]:
# read the data and show the data a bit
originalDataframe = pd.read_csv("../Dataset/TrainingSet.csv")
originalDataframe.describe()

Unnamed: 0,Entropy,Variance,RMS,PP,Energy,BSF,BPFI,BPFO,23 Hz
count,2048.0,2048.0,2048.0,2048.0,2048.0,2048.0,2048.0,2048.0,2048.0
mean,6.024781,10368.341587,91.22906,2603.767188,734902500000.0,0.152326,0.163637,0.112275,0.163637
std,0.532248,10741.483861,51.280495,1968.876782,721258400000.0,0.105076,0.131028,0.110026,0.131028
min,5.127,688.32,26.298,371.2,46400000000.0,0.002227,0.000535,0.001608,0.000535
25%,5.607325,1764.15,42.07525,641.6,119000000000.0,0.05695,0.040418,0.033465,0.040418
50%,5.9666,6247.95,83.455,2367.2,467500000000.0,0.129545,0.1111,0.062867,0.1111
75%,6.5217,16586.0,130.28,3512.0,1140000000000.0,0.242155,0.279273,0.160303,0.279273
max,6.9062,39762.0,199.46,9760.0,2670000000000.0,0.40699,0.42412,0.4201,0.42412


In [9]:
# add label to the data (256 per condition)
label = [floor(i / 256) for i in range(2048)]
originalDataframe["Condition"] = label

# sneak a peek of what you have done :)
originalDataframe.describe()

Unnamed: 0,Entropy,Variance,RMS,PP,Energy,BSF,BPFI,BPFO,23 Hz,Condition
count,2048.0,2048.0,2048.0,2048.0,2048.0,2048.0,2048.0,2048.0,2048.0,2048.0
mean,6.024781,10368.341587,91.22906,2603.767188,734902500000.0,0.152326,0.163637,0.112275,0.163637,3.5
std,0.532248,10741.483861,51.280495,1968.876782,721258400000.0,0.105076,0.131028,0.110026,0.131028,2.291847
min,5.127,688.32,26.298,371.2,46400000000.0,0.002227,0.000535,0.001608,0.000535,0.0
25%,5.607325,1764.15,42.07525,641.6,119000000000.0,0.05695,0.040418,0.033465,0.040418,1.75
50%,5.9666,6247.95,83.455,2367.2,467500000000.0,0.129545,0.1111,0.062867,0.1111,3.5
75%,6.5217,16586.0,130.28,3512.0,1140000000000.0,0.242155,0.279273,0.160303,0.279273,5.25
max,6.9062,39762.0,199.46,9760.0,2670000000000.0,0.40699,0.42412,0.4201,0.42412,7.0


In [24]:
# feed into a condition, get training and validation data of K
def seperateTrainValidation(dataframe, kPart, K):
    partitionSize = int(len(dataframe) / K)
    partitions = [dataframe.iloc[partitionSize * i: partitionSize * (i + 1)] for i in range(K)]
    
    validationData = partitions[kPart]
    trainingData = pd.DataFrame(columns = dataframe.columns)
    for i in range(K):
        if i is not kPart:
            trainingData = trainingData.append(partitions[i], ignore_index = True)

    return trainingData, validationData

In [25]:
# split the data by conditions
originalDataframeSize = int(len(originalDataframe) / 8)
conditions = [originalDataframe.iloc[originalDataframeSize * i: originalDataframeSize * (i + 1)] for i in range(8)]

In [32]:
K = 4
part = 3

trainingData = pd.DataFrame(columns = originalDataframe.columns)
validationData = pd.DataFrame(columns = originalDataframe.columns)

for i in range(8):
    train, val = seperateTrainValidation(conditions[i], part, K)
    trainingData = trainingData.append(train)
    validationData = validationData.append(val)
    
trainingData.to_csv("../Dataset/trainingData_{}.csv".format(part), index = False)
validationData.to_csv("../Dataset/validationData_{}.csv".format(part), index = False)

In [28]:
trainingData

Unnamed: 0,Entropy,Variance,RMS,PP,Energy,BSF,BPFI,BPFO,23 Hz,Condition
0,6.0365,726.43,27.012,491.2,4.900000e+10,0.025956,0.026630,0.018925,0.026630,0
1,6.0389,727.36,27.124,484.8,4.940000e+10,0.038400,0.020192,0.027468,0.020192,0
2,6.0773,763.81,27.640,483.2,5.130000e+10,0.066190,0.002209,0.020683,0.002209,0
3,6.0726,748.95,27.371,432.0,5.030000e+10,0.029492,0.063825,0.025780,0.063825,0
4,6.0483,724.15,27.012,472.0,4.900000e+10,0.015728,0.030236,0.034100,0.030236,0
5,6.0590,747.51,27.368,507.2,5.030000e+10,0.077257,0.029326,0.011524,0.029326,0
6,6.0438,721.54,26.874,480.0,4.850000e+10,0.023505,0.031963,0.025671,0.031963,0
7,6.0627,743.29,27.289,460.8,5.000000e+10,0.011697,0.022466,0.028103,0.022466,0
8,6.0715,743.09,27.278,419.2,4.990000e+10,0.082963,0.050477,0.065647,0.050477,0
9,6.0498,733.61,27.208,419.2,4.970000e+10,0.034394,0.021873,0.025050,0.021873,0
