In [61]:
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

This code loads your CSV file, splits the data into a training set and a test set, and creates a DataLoader for each. The DataLoader can be used to iterate through the data in batches, which is useful for training a neural network.

You can replace 'yourfile.csv' with the path to your actual file. Also, note that this assumes your CSV file doesn't have a header. If it does, you might need to skip the first row.

In [62]:
class AccelDataset(Dataset):
    def __init__(self, data, labels, sequence_length=10):
        self.data = [data[i:i+sequence_length] for i in range(len(data) - sequence_length + 1)]
        self.labels = labels[(int)(sequence_length/2) - 1 : len(data) - (sequence_length - (int)(sequence_length/2))]
        # change to get the majority
        self.sequence_length = sequence_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx].transpose(0, 1), self.labels[idx]  # Transposing the sequence and channel dimensions


In [63]:
# Load CSV file
WORKAREA_PATH = './'
class_num = 8 ## Although annoying, requiring this be manually adjusted
    ## to open the file also means that a later instance of this number will be correct
dataframe = pd.read_csv(WORKAREA_PATH + f'Data/COMBINED_Type5-WithClassNum{class_num}-Freq10-Labeled_Motion-sessions_23-24_Fall.csv')
# code test file: Data/Week 1/Left then Right/Processed/Type3-Freq10-Labeled_Motion-sessions_2023-08-26_17-25-54.csv
# classifier training file: Data/COMBINED_Type3-Freq10-Labeled_Motion-sessions_23-24_Fall.csv
# LIST OF FULL-SIZED FILES:
    # Data/COMBINED_Type3-Freq10-Labeled_Motion-sessions_23-24_Fall.csv
    # Data/COMBINED_Type5-WithClassNum{class_num}-Freq10-Labeled_Motion-sessions_23-24_Fall.csv

print(len(dataframe))
print()
# print(dataframe.columns)
dataframe.columns = pd.Index(np.arange(len(dataframe.columns)), dtype='int64')
# print(dataframe.columns)
def skcounter(df):
    skarr = np.unique(df, return_counts = True)
    # print(skarr)
    # find_index = np.where(skarr[0], True, False)
    # for i in range(len(find_index)):
    #     if find_index[i]: ret_valsk = skarr[1][i]
    ret_dict = dict(zip(skarr[0], skarr[1]))
    print(ret_dict)
    ret_valsk = ret_dict.get(1, 0)
    print(ret_valsk)
    # print(dataframe)
    return ret_valsk
avg_count = int(np.median(dataframe[dataframe.columns[3 : ]].apply(lambda x: skcounter(x)).to_numpy()))
# class_counts = {}
# for i in dataframe.columns[input_num : ]:
#     if i == SKDescriptors.STATIONARY_CLASS:
#         stationary_rows = dataframe[dataframe[i] == 1]
#         continue
#     class_counts[i] = len(dataframe[dataframe[i] == 1])

print()
for i in dataframe.columns[3 : ]:
    class_i_rows = dataframe[dataframe[i] == 1]
    # this is the line doing the actual randomization
    sample_rows = class_i_rows.sample(min(len(class_i_rows), avg_count), random_state=42)
    print(len(sample_rows))
    dataframe = dataframe.drop(class_i_rows.drop(sample_rows.index).index)
    print(len(dataframe))


# stat and other adjustments only
# stationary_rows = dataframe[dataframe[len(dataframe.columns) - 2] == 1]
# other_rows = dataframe[dataframe[len(dataframe.columns) - 1] == 1]
# print()
# print(avg_count)
# print("statlen: " + str(len(stationary_rows)))
# print("otherlen: " + str(len(other_rows)))
# # this is the line doing the actual randomization
# sample_rows = stationary_rows.sample(avg_count, random_state=42)
# print(len(sample_rows))
# dataframe = dataframe.drop(stationary_rows.drop(sample_rows.index).index)
# print(len(dataframe))
# # this is the line doing the actual randomization
# sample_rows = other_rows.sample(avg_count, random_state=42)
# print(len(sample_rows))
# dataframe = dataframe.drop(other_rows.drop(sample_rows.index).index)
# print(len(dataframe))





# Get data and labels from dataframe
data = dataframe.iloc[:, :3].values  # x, y, z data
labels = dataframe.iloc[:, 3:].values  # labels


sequence_length = 10

# Split data into training and test sets
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Convert data to tensors
data_train = torch.tensor(data_train, dtype=torch.float32)  
data_test = torch.tensor(data_test, dtype=torch.float32)

# Convert labels to tensors and get max index (assuming one-hot encoding)
labels_train = torch.argmax(torch.tensor(labels_train, dtype=torch.float32), dim=1)
labels_test = torch.argmax(torch.tensor(labels_test, dtype=torch.float32), dim=1)

# Create data loaders
train_dataset = AccelDataset(data_train, labels_train, sequence_length)
test_dataset = AccelDataset(data_test, labels_test, sequence_length)

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)


22292

{0: 21276, 1: 1016}
1016
{0: 21244, 1: 1048}
1048
{0: 20288, 1: 2004}
2004
{0: 20350, 1: 1942}
1942
{0: 22006, 1: 286}
286
{0: 22022, 1: 270}
270
{0: 8990, 1: 13302}
13302
{0: 19869, 1: 2423}
2423

1016
22292
1048
22292
1495
21783
1495
21336
286
21336
270
21336
1495
9529
1495
8601


In this code, we added 2 more convolutional layers, which can extract more complex features from your accelerometer data. The number of output channels in the convolutional layers gradually increases, as it is common in many deep learning models to gradually increase the complexity and decrease the spatial size.

In [64]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv1d(3, 64, kernel_size=3)
        self.conv2 = nn.Conv1d(64, 128, kernel_size=3)
        self.conv3 = nn.Conv1d(128, 256, kernel_size=3)
        
        # Adjust the fully connected layer's input size based on the new sequence length after convolutions.
        # Adjusted for sequence length = 4 after 3 conv layers with kernel size 3
        # 10 -3 + 1 = 8 after the first layer
        # 8 - 3 + 1 = 6 after the second layer
        # 6 - 3 + 1 = 4 after the third layer
        self.fc1 = nn.Linear(256 * (sequence_length - 6), 128)  # Adjusted for sequence length = 4 after 3 conv layers with kernel size 3
        self.fc2 = nn.Linear(128, class_num)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = x.view(-1, self.num_flat_features(x))  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)  # Apply softmax to the output layer

    def num_flat_features(self, x):
        size = x.size()[1:]  # All dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


Please note that the dimension of the input to the fully connected layer depends on the output size of your last convolutional layer. This code assumes that after 3 layers of convolution with kernel size 5 and stride 1, the sequence length is reduced to 82 (from the original 100). You may need to adjust this according to your own situation.

In [65]:
# Instantiate the network and optimizer
net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.01)

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Assume we have a data loader `train_dataloader` which loads our training accelerometer data
for epoch in range(1001):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if (epoch % 200) == 0:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

# Now we will validate the model using test data
correct = 0
total = 0

with torch.no_grad():   # Since we're not training, we don't need to calculate the gradients
    for data in test_dataloader:
        inputs, labels = data
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test data: %d %%' % (100 * correct / total))



# Saving the entire model

torch.save(net, 'model.pth')

[1,     1] loss: 0.001


[1,     2] loss: 0.001
[1,     3] loss: 0.001
[1,     4] loss: 0.001
[1,     5] loss: 0.001
[1,     6] loss: 0.001
[1,     7] loss: 0.001
[1,     8] loss: 0.001
[1,     9] loss: 0.001
[1,    10] loss: 0.001
[1,    11] loss: 0.001
[1,    12] loss: 0.001
[1,    13] loss: 0.001
[1,    14] loss: 0.001
[1,    15] loss: 0.001
[1,    16] loss: 0.001
[1,    17] loss: 0.001
[1,    18] loss: 0.001
[1,    19] loss: 0.001
[1,    20] loss: 0.001
[1,    21] loss: 0.001
[1,    22] loss: 0.001
[1,    23] loss: 0.001
[1,    24] loss: 0.001
[1,    25] loss: 0.001
[1,    26] loss: 0.001
[1,    27] loss: 0.001
[1,    28] loss: 0.001
[1,    29] loss: 0.001
[1,    30] loss: 0.001
[1,    31] loss: 0.001
[1,    32] loss: 0.001
[1,    33] loss: 0.001
[1,    34] loss: 0.001
[1,    35] loss: 0.001
[1,    36] loss: 0.001
[1,    37] loss: 0.001
[1,    38] loss: 0.001
[1,    39] loss: 0.001
[1,    40] loss: 0.001
[1,    41] loss: 0.001
[1,    42] loss: 0.001
[1,    43] loss: 0.001
[1,    44] loss: 0.001
[1,    45] 