In [41]:
import numpy as np
import os

parent_dir= './datas/'
sub_dir = ['00Sitting','01Waving','02Walking','03Jumping','04Running']
LABEL = {
    0: 'Sitting',
    1: 'Stretching',
    2: 'Walking',
    3: 'Jumping',
    4: 'Running'
}
window_size = 25
shift_amount = 5
augmentation = True

# segment data

In [42]:
def sliding_window(data,size):
    start = 0
    while start< len(data):
        yield int(start), int(start + size)
        start+= (size/2)
        # start += size

def data_shifting(data,shift_amount):
    # amout1 = np.random.randint(-shift_amount,shift_amount) or np.random.randint(-shift_amount,shift_amount)
    # amout2 = np.random.randint(-shift_amount,shift_amount//2) or np.random.randint(-shift_amount,shift_amount//2)
    # while amout1 == amout2:
    #     amout2 = np.random.randint(-shift_amount,shift_amount//2) or np.random.randint(-shift_amount,shift_amount//2)
    # return np.roll(data, amout1),np.roll(data, amout2)
        
    shift_amount = np.random.randint(-shift_amount,shift_amount) or np.random.randint(-shift_amount,shift_amount)
    shifted_data = np.roll(data, shift_amount)
    return shifted_data


def segment_signal(file_path,window_size,augmentation=False):
    
    data = np.genfromtxt(file_path, delimiter=',', dtype="int", encoding=None)
    
    segments = np.empty((0, window_size),dtype=float) 
    labels = np.empty(0,dtype=int)
    
    for (start, end) in sliding_window(range(len(data)), window_size):
        try:
            v = np.array(data[start:end,3])
            label = np.array(data[start:end,0])
            if augmentation:
                v_aug1= data_shifting(v,shift_amount)
                # v_aug2 = data_shifting(v,shift_amount)
            if len(v) == window_size:
                segments = np.vstack([segments, v]) if not augmentation else np.vstack([segments, v,v_aug1])
                labels = np.append(labels,label[0]) if not augmentation else np.append(labels,[label[0],label[0]])
        except Exception as e:
            print(f'error: {e}')
            print(f'files{file_path}') 
            exit()  
            
    print(f'file_path:{file_path}')
    print(f"segments.shape{segments.shape}")
    print(f"labels.shape{labels.shape}")
    return segments, labels

def traverse_folder(folder_path):
    for file in os.listdir(folder_path):
        content_path = os.path.join(folder_path, file)
        if os.path.isfile(content_path):
            segment,label = segment_signal(content_path,window_size,augmentation=augmentation)
            yield segment,label
        else:
            print(f'not file: {content_path}')
            
def combine_segments(prefix):
    # train data
    segments = np.empty((0, window_size),dtype=int)  
    labels = np.empty(0,dtype=int)  

    for segment, label in traverse_folder(prefix):
        segments = np.concatenate((segments, segment), axis=0)
        labels = np.concatenate((labels, label), axis=0)
    
    
    print(f'==SummarizeTrain=={LABEL[labels[0]]}====')
    print(f"segments.shape{segments.shape}")
    print(f"labels.shape{labels.shape}")
    return segments, labels


def load_test_data(sub_dir):
    prefix =   parent_dir + 'testData/'+ sub_dir + '/'
    segments = np.empty((0, window_size),dtype=int)  
    labels = np.empty(0,dtype=int)  

    for segment, label in traverse_folder(prefix):
        segments = np.concatenate((segments, segment), axis=0)
        labels = np.concatenate((labels, label), axis=0)
        traverse_plot(prefix)
        
    print(f'==SummarizeTest=={LABEL[labels[0]]}====')
    print(f"segments.shape{segments.shape}")
    print(f"labels.shape{labels.shape}")
    return segments, labels

# prefix = parent_dir + sub_dir[0] + '/'
# segments, labels = combine_segments(prefix)

# plot data

In [43]:
# plot the velocity vs time
import matplotlib.pyplot as plt
size = 1000
def traverse_plot(folder_path):
    for file in os.listdir(folder_path):
        content_path = os.path.join(folder_path, file)
        if os.path.isfile(content_path):
            print(f'plot graph for file: {content_path}')
            activ_data = get_data(content_path)
            plot_velocity(activ_data)
            plot_coordinate(activ_data)
        else:
            print(f'not file: {content_path}')
            
def get_data(file_path):
    data = np.genfromtxt(file_path, delimiter=',', dtype="int", encoding=None)
    
    t = np.arange(0, len(data) * 0.1, 0.1)
    label = np.array(data[:,0])
    v = np.array(data[:, 3] / 100)
    x = np.array(data[:, 1] / 1000)
    y = np.array(data[:, 2] / 1000)

    activ_data = np.vstack((label, t, x, y, v)).T
    return activ_data

def plot_coordinate(activ_data,size=3000):
    plt.figure(figsize=(10, 6))
    x = activ_data[:, 2]
    y = activ_data[:, 3]
    v = activ_data[:, 4]
    label = LABEL[activ_data[:, 0][0]]
    plt.scatter(x, y, c=v, cmap='viridis', marker='o', label='Position',s=5)
    plt.xlabel('x')
    plt.ylabel('Y')
    plt.colorbar(label='velocity')
    plt.title(label=label)
    plt.grid(True)
    plt.legend()
    plt.show()
    
def plot_velocity(activ_data,size=3000):
    fig, ax = plt.subplots(figsize=(15, 4))

    ax.plot(activ_data[:, 1], activ_data[:, 4])
    ax.set_xlabel('t(s)')
    ax.set_ylabel('v(m/s)')
    ax.set_title(LABEL[activ_data[:, 0][0]] + ' velocity vs time', fontsize=16)

    plt.show()
    

# merging all activities into one npy

In [44]:
def split_data(datas,train_ratio=0.8):
    segments,labels = datas
    total_samples = len(segments)
    # indices = np.arange(total_samples)
    # np.random.shuffle(indices)
    
    # splitting in training and testing data
    trainSplit = np.random.rand(total_samples) < train_ratio
    
    train_segments = segments[trainSplit]
    test_segments = segments[~trainSplit]
    # train_segments = np.nan_to_num(train_segments)
    # test_segments = np.nan_to_num(test_segments)
    train_labels = labels[trainSplit]
    test_labels = labels[~trainSplit]
    print("After splitting:")
    print(f'train:{train_segments.shape},test:{test_segments.shape}')
    return train_segments,train_labels,test_segments, test_labels


In [45]:

data = {
    'train_segments': np.empty((0, window_size),dtype=int), 
    'train_labels': np.empty(0, dtype=int), 
    'test_segments': np.empty((0, window_size),dtype=int), 
    'test_labels': np.empty(0,dtype=int)
}
train_ratio = 0.8
# shuffle_results = []
for IDX in range(5):
    prefix = parent_dir + sub_dir[IDX] + '/'
    # segments, labels = combine_segments(prefix)
    # train_segment, test = combine_segments(prefix)
    print(f"=====loading Testdata=====")
    shuffle_results = split_data(combine_segments(prefix),train_ratio=train_ratio)
    # traverse_plot(prefix)
    # print(f"=====loading Traindata=====")
    # test_segments, test_labels = shuffle_data(load_test_data(sub_dir[IDX]))

    # shuffle_results.append(shuffle_data(combine_segments(prefix)))
    # traverse_plot(prefix)
    # shuffle_results.append (shuffle_data(load_test_data(sub_dir[IDX])))

    for i, key in enumerate(['train_segments', 'train_labels', 'test_segments', 'test_labels']):
        data[key] = np.concatenate([data[key], shuffle_results[i]],axis=0)
        # data[key] = np.concatenate([data[key], locals()[key]], axis=0)

print("=====saving data=====")
for key, value in data.items():
    print(f"{key}.shape: {value.shape}")
    np.save(f"{key}.npy", value)

t = data['train_segments']


=====loading Testdata=====
file_path:./datas2/00Sitting/Sitting 2023-12-13 17229.txt
segments.shape(1208, 25)
labels.shape(1208,)
file_path:./datas2/00Sitting/Sitting 2023-12-13 173537.txt
segments.shape(1026, 25)
labels.shape(1026,)
file_path:./datas2/00Sitting/Sitting 2023-12-21 162741.txt
segments.shape(650, 25)
labels.shape(650,)
==SummarizeTrain==Sitting====
segments.shape(2884, 25)
labels.shape(2884,)
After splitting:
train:(2342, 25),test:(542, 25)
=====loading Testdata=====
file_path:./datas2/01Waving/Waving 2023-12-13 20546.txt
segments.shape(1120, 25)
labels.shape(1120,)
file_path:./datas2/01Waving/Waving 2023-12-13 2199.txt
segments.shape(1274, 25)
labels.shape(1274,)
file_path:./datas2/01Waving/Waving 2023-12-21 163641.txt
segments.shape(440, 25)
labels.shape(440,)
==SummarizeTrain==Stretching====
segments.shape(2834, 25)
labels.shape(2834,)
After splitting:
train:(2269, 25),test:(565, 25)
=====loading Testdata=====
file_path:./datas2/02Walking/Walking 2023-12-14 10614.txt
