# ARTIFICIAL NEURAL NETWORK 
### NOTE: USED ON GOOGLE COLAB, NOT LOCAL DRIVE)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
from torch.autograd import Variable
import torch.utils.data as Data
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error
torch.manual_seed(1) 

In [None]:
#setup Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
def get_dataset(filepath, right_arm=False):
    data0 = pd.read_csv(filepath + ".csv")
    data1 = pd.read_csv(filepath + "1.csv")
    data2 = pd.read_csv(filepath + "2.csv")
    df = pd.concat([data0, data1, data2], ignore_index=True)
    X = "R" if right_arm else "L"
    angle_names = ["{}ShoulderPitch".format(X),
                   "{}ShoulderRoll".format(X),
                   "{}ElbowYaw".format(X),
                   "{}ElbowRoll".format(X)]
    labels = df[angle_names]
    data = df[[column for column in df.columns if column not in angle_names]]
    # filter corrupted data!!!
    data = data.drop_duplicates(keep=False)
    labels = labels.loc[data.index]

    data, labels = torch.Tensor(data.values), torch.Tensor(labels.values)
    return Data.TensorDataset(data, labels)

In [None]:
def get_net(net_version=None):
    if net_version == 0:
        net = nn.Sequential(
                    nn.Linear(6, 200),
                    nn.LeakyReLU(),
                    nn.Linear(200, 100),
                    nn.LeakyReLU(),
                    nn.Linear(100, 100),
                    nn.LeakyReLU(),
                    nn.Linear(100, 100),
                    nn.LeakyReLU(),
                    nn.Linear(100, 100),
                    nn.LeakyReLU(),
                    nn.Linear(100, 100),
                    nn.LeakyReLU(),
                    nn.Linear(100, 100),
                    nn.LeakyReLU(),
                    nn.Linear(100, 100),
                    nn.LeakyReLU(),
                    nn.Linear(100, 100),
                    nn.LeakyReLU(),
                    nn.Linear(100, 50),
                    nn.LeakyReLU(),
                    nn.Linear(50, 4)
                )
    else:
        net = nn.Sequential(
                    nn.Linear(6, 200),
                    nn.LeakyReLU(),
                    nn.Linear(200, 100),
                    nn.LeakyReLU(),
                    nn.Linear(100, 4)
                )
    return net

In [None]:
def train_ann(filepath, net=None, batch_size=10000, epoch=300, learning_rate=0.01, decay=0, net_version=0):

    dataset = get_dataset(filepath)
    loader = Data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    
    if net is None:
        net = get_net(net_version)
    if torch.cuda.is_available():
        net.cuda()
        print('CUDA is available!  Training on GPU ...')

    optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=decay)
    loss_func = nn.MSELoss()

    iters, losses = [], []
    n = 0
    best_result = np.inf
    best_epoch = 0
    for epoch in range(epoch):
        error = np.array([0.0, 0.0, 0.0, 0.0])
        for step, (data, labels) in enumerate(loader):
            #############################################
            #To Enable GPU Usage
            if torch.cuda.is_available():
                data = data.cuda()
                labels = labels.cuda()
            #############################################
            prediction = net(data)
            loss = loss_func(prediction, labels) 
            loss.backward()         # backpropagation, compute gradients
            optimizer.step()        # apply gradients
            optimizer.zero_grad()   # clear gradients for next train
            
            iters.append(n)
            losses.append(float(loss)/batch_size)             # compute *average* loss
            n += 1

            for x in range(4):
                if torch.cuda.is_available():
                    error[x] += mean_absolute_error(prediction[:, x].cpu().detach().numpy(), labels[:,x].cpu().detach().numpy())
                else:
                    error[x] += mean_absolute_error(prediction[:, x].detach().numpy(), labels[:,x].detach().numpy())
        print("Epoch: {}, Error: {}".format(epoch, error / step))
        
        if sum(error) < best_result:
            best_result = sum(error)
            best_epoch = epoch
            print("Best result at Epoch {}. Saving model parameters.".format(epoch))
            model_path = "ann_epoch{}".format(epoch)
            torch.save(net.state_dict(), model_path)   
            
        if (epoch + 1) % 100 == 0:
            # saving the best model at my local google drive for every 100th epoch
            model_path = "ann_epoch{}".format(best_epoch)
            state = torch.load(model_path)
            net.load_state_dict(state)
            model_path = "/content/drive/My Drive/Thesis/left_arm_ANN_model"
            torch.save(net.state_dict(), model_path)

    # plotting
    plt.title("Training Curve")
    plt.plot(iters, losses, label="Train")
    plt.xlabel("Iterations")
    plt.ylabel("Loss")
    plt.show()

In [None]:
left_arm_filepath = "/content/drive/My Drive/Thesis/left_arm_data"
net = get_net()
train_ann(left_arm_filepath, net)

### Hyperparameter tuning result
train_ann(batch_size=10000, learning_rate=0.01, net_version=0) 

Epoch: 51, Error: [0.09198165 0.03829048 0.1201902  0.04457075]

train_ann(batch_size=10000, learning_rate=0.01, net_version=1) 

Epoch: 198, Error: [0.0830939  0.01547376 0.11576251 0.02581779]

# ROUGH WORK PAST THIS POINT!!

## RANDOM FOREST REGRESSOR

In [84]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import pickle
import math

In [85]:
def split_data(df, right_arm):
    X = "R" if right_arm else "L"
    angle_names = ["{}ShoulderPitch".format(X),
                   "{}ShoulderRoll".format(X),
                   "{}ElbowYaw".format(X),
                   "{}ElbowRoll".format(X)]
    labels = df[angle_names]
    data = df[[column for column in df.columns if column not in angle_names]]
    
    # filter corrupted data!!!
    data = data.drop_duplicates(keep=False)
    labels = labels.loc[data.index]
    
    return train_test_split(data, labels, test_size=0.9, random_state=5) 

In [86]:
def train(filepath, right_arm=False):
    # get data
    data0 = pd.read_csv(filepath + ".csv")
    data1 = pd.read_csv(filepath + "1.csv")
    data2 = pd.read_csv(filepath + "2.csv")
    df = pd.concat([data0, data1, data2], ignore_index=True)
    
    # split data
    x_train, x_test, y_train, y_test = split_data(df, right_arm)
    
    # train model
    model=RandomForestRegressor(n_estimators=50) # n_estimator = 100 -> 10 GB pickle file
                                                 # n_estimator = 50 -> 5 GB pickle file (~0.02 error difference)
    model.fit(x_train,y_train)
    # get model accuracy
    y_pred = model.predict(x_test)
    y_pred = pd.DataFrame(y_pred, columns=y_test.columns)
    for column in y_pred.columns:
        print(column, "Error:",mean_absolute_error(y_test[column], y_pred[column]))
    
    # save model
    filename = 'right_arm_model.pkl' if right_arm else 'left_arm_model.pkl'
    pickle.dump(model, open(filename, 'wb'))

In [87]:
left_arm_filepath = "/home/kevinh/Documents/left_arm_data"
right_arm_filepath = "/home/kevinh/Documents/right_arm_data"
train(left_arm_filepath)
# train(right_arm_filepath, right_arm=True)

('LShoulderPitch', 'Error:', 0.07815650313318641)
('LShoulderRoll', 'Error:', 0.018343106460681394)
('LElbowYaw', 'Error:', 0.15007631305212996)
('LElbowRoll', 'Error:', 0.0402890007508116)


## TEST

In [90]:
filepath = left_arm_filepath
data0 = pd.read_csv(filepath + ".csv")
data1 = pd.read_csv(filepath + "1.csv")
# data2 = pd.read_csv(filepath + "2.csv")
df = pd.concat([data0, data1], ignore_index=True)

In [92]:
X = "L"
angle_names = ["{}ShoulderPitch".format(X),
               "{}ShoulderRoll".format(X),
               "{}ElbowYaw".format(X),
               "{}ElbowRoll".format(X)]
labels = df[angle_names]
data = df[[column for column in df.columns if column not in angle_names]]

In [94]:
# filter corrupted data!!!
data = data.drop_duplicates(keep=False)
labels = labels.loc[data.index]

In [81]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.9, random_state=5) 

In [82]:
# train model
model=RandomForestRegressor(n_estimators=50)
model.fit(x_train,y_train)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=50, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False)

In [83]:
# get model accuracy
y_pred = model.predict(x_test)
y_pred = pd.DataFrame(y_pred, columns=y_test.columns)
for column in y_pred.columns:
    print(column, "Error:",mean_absolute_error(y_test[column], y_pred[column]))

('LShoulderPitch', 'Error:', 0.11389112294205424)
('LShoulderRoll', 'Error:', 0.033337454145367554)
('LElbowYaw', 'Error:', 0.2354261375816541)
('LElbowRoll', 'Error:', 0.07061861411256729)


### result (500 thousand data points)
*without rounding data*

- ('LShoulderPitch', 'Error:', 0.11069172338849657)
- ('LShoulderRoll', 'Error:', 0.03154031012584691)
- ('LElbowYaw', 'Error:', 0.2300845963747696)
- ('LElbowRoll', 'Error:', 0.06800276852256665)

*rounding data*

- ('LShoulderPitch', 'Error:', 0.11069172338849657)
- ('LShoulderRoll', 'Error:', 0.03154031012584691)
- ('LElbowYaw', 'Error:', 0.2300845963747696)
- ('LElbowRoll', 'Error:', 0.06800276852256665)

*n_estimators decreased from 200 to 100*
- ('LShoulderPitch', 'Error:', 0.11201086854757766)
- ('LShoulderRoll', 'Error:', 0.03214602543515891)
- ('LElbowYaw', 'Error:', 0.23213343702121328)
- ('LElbowRoll', 'Error:', 0.06910553107129716)

*n_estimators decreased from 200 to 50*
- ('LShoulderPitch', 'Error:', 0.11389112294205424)
- ('LShoulderRoll', 'Error:', 0.033337454145367554)
- ('LElbowYaw', 'Error:', 0.2354261375816541)
- ('LElbowRoll', 'Error:', 0.07061861411256729)

### result (4 million data points)
*n_estimator = 50, 5GB pickle file*
- ('LShoulderPitch', 'Error:', 0.07815650313318641)
- ('LShoulderRoll', 'Error:', 0.018343106460681394)
- ('LElbowYaw', 'Error:', 0.15007631305212996)
- ('LElbowRoll', 'Error:', 0.0402890007508116)