In [12]:
import matplotlib.pyplot as plt
import csv
import os
import numpy as np
%matplotlib inline

In [13]:
class Sample:
    def __init__(self, label, processed_csv):
        #A sample contains the label name and the accelerometer (x,y,z) and gyroscope data (x,y,z) tuples
        #not necessary but its a bit more verbose to read than samples[label][0][0] for example
        self.label = label
        self.accelerometer = Sensor(processed_csv[0], processed_csv[1], processed_csv[2])
        self.gyroscope = Sensor(processed_csv[3], processed_csv[4], processed_csv[5])

class Sensor:
    def __init__(self, x, y, z):
        #again, not necessary but accessing it as accelerometer.x gives me the good brain chemicals
        self.x = x
        self.y = y
        self.z = z
    def print_range(self, num):
        #prints the first num values of the sensor
        print(self.x[:num])
        print(self.y[:num])
        print(self.z[:num])

In [14]:
def clean_csv(csv_file):
    #get labels
    #we won't really need them since we know the order of the data but it's a sanity check
    labels = csv_file[0]
    #swap rows and columns, excluding the first row
    #this makes it so csv_file[0] is the first column, csv_file[1] is the second column, etc.
    #this makes it way more intuitive to access the data
    csv_file = [list(i) for i in zip(*csv_file[1:])] 
    return labels, csv_file


In [15]:
#we need to load the csvs but standup is contained in a folder since it would be harder to splice since standing happens at variable times
basedir = "data/"
labels = ["standup", "climbing", "walking", "still"]
#we will break our data into 50 samples per label (since standup is not logically divisible or concatenable and it has 50 samples
#this means that the other samples, which are in a single file each, will be need to be roughly split into 50 samples each
data = [] #normally we seperate based on label but the Sample class stores that so we can deal with it later and just dump everything in here
for label in labels:
    if label == "standup":
        for filename in os.listdir(basedir+label):
            if filename.endswith(".csv"):
                with open(basedir+label+"/"+filename, 'r') as f:
                    reader = csv.reader(f)
                    csv_file = list(reader)
                    x, csv_file = clean_csv(csv_file)
                    new_sample = Sample(label, csv_file)
                    data.append(new_sample)
                    # print_csv(csv_file)
    else:
        with open(basedir+label+".csv", 'r') as f:
            reader = csv.reader(f)
            csv_file = list(reader)
            x, csv_file = clean_csv(csv_file)
            for i in range(50): #split into 50 samples
                #the exact length of the csv's will vary so we need to use split it into 50 and discard the remainder
                # print(i, "start:stop", i*(len(whole_csv_file)-1)//50, (i+1)*(len(whole_csv_file)-1)//50)
                index_range = range(i*(len(csv_file[0]))//50,(i+1)*(len(csv_file[0])//50))
                new_sample = Sample(label, [csv_file[0][index_range[0]:index_range[-1]], csv_file[1][index_range[0]:index_range[-1]], csv_file[2][index_range[0]:index_range[-1]], csv_file[3][index_range[0]:index_range[-1]], csv_file[4][index_range[0]:index_range[-1]], csv_file[5][index_range[0]:index_range[-1]]])
                data.append(new_sample)
                
                

#now we have all of our data in a list of Sample objects
print("data length:", len(data)) #should be 50*#labels - sanity check
           

data length: 200


In [16]:
#lets print some random label data
for x in range (5):
    sample = data[np.random.randint(0, len(data))]
    print("label:", sample.label)
    sample.accelerometer.print_range(5)
    sample.gyroscope.print_range(5)

label: standup
['6.3013', '6.3013', '6.0834', '5.7794', '5.4801']
['-6.9429', '-6.9429', '-6.3563', '-5.9709', '-5.8871']
['1.7501', '1.7501', '1.4317', '1.3431', '1.3503']
['-0.7666', '-0.5626', '-0.4013', '-0.2675', '-0.2101']
['2.3376', '2.3247', '2.2771', '2.2068', '2.147']
['-1.4333', '-1.294', '-1.1511', '-0.9794', '-0.8237']
label: climbing
['4.124046261888743', '0.1354629479348659', '2.3746038102388383', '0.8280226030141115', '-1.8893290612459184']
['-1.148789511373639', '2.676719794142246', '3.645176996023953', '0.4953573131985962', '-0.546045736297965']
['5.238669884288311', '-3.5153581914559004', '-1.9783872547499837', '-0.2830637851700187', '-2.0810455190613864']
['-0.5418083071708679', '1.20756995677948', '1.3309763669967651', '1.95184588432312', '0.1630344688892364']
['-0.6679011583328247', '0.5179376602172852', '-0.1528165191411972', '0.2842824459075928', '0.1861748844385147']
['1.2143772840499878', '2.977398633956909', '2.5979461669921875', '0.6369724273681641', '-0.587