# Transferred DCNN (VGG16). 
## Training transferred model on all users except one then testing on that user.
* This is done for each user in turn to help determine the accuracy and robustness of the model.
* This is also evaluated for 3s, 2s, 1.5s and 1s spectrogram windows to compare the trade-off between accuracy and lower latency.

Results to be compared with /baseline_models/leave_one_user_out_cnn_64_128.ipynb

### Summary
*   1, 1.5, 2 and 3 second windows tested
*   0 degree aspect angle
*   All movements
*   Transferred, pre-trained DCNN (VGG16)
*   Datsets:
    * 1: Miss out A
    * 2: Miss out B
    * 3: Miss out C
    * 4: Miss out D
    * 5: Miss out E
    * 6: Miss out F

*   Take a DCNN trained on image classification
*   Adapt to spectrogram classification
  




Inspired by "Micro-Doppler Based Classification of Human Aquatic Activities via Transfer Learning of Convolutional Neural Networks"

## Train or Load Toggle
These variables set whether to load results or train the model.
* If set to True then the model will be trained, history saved, new graphs generated and saved and new analysis produced.
* If set to False then a pre-trained version of the model will be loaded along with a history object.

In [None]:
TRAIN_MODEL_3s = False
TRAIN_MODEL_2s = True
TRAIN_MODEL_1_5_s = False
TRAIN_MODEL_1s = False

## Notebook setup

Allow editing of modules using editor (auto reloading)

In [0]:
# Needed to allow editing using PyCharm
%load_ext autoreload
%autoreload 2

Needed for compatibility when using both CoLab and Local Jupyter notebook. It sets the appropriate file path for the data and also installs local packages such as models and data_loading.

In [2]:
import os
if os.getcwd() == '/content':
    from google.colab import drive
    drive.mount('/content/gdrive')
    BASE_PATH = '/content/gdrive/My Drive/Level-4-Project/'
    !cd gdrive/My\ Drive/Level-4-Project/ && pip install --editable .
    os.chdir('gdrive/My Drive/Level-4-Project/')
    
elif os.getcwd() == 'D:\\Google Drive\\Level-4-Project\\notebooks\\transferred_DCNN_experiments':
    BASE_PATH = "D:/Google Drive/Level-4-Project/"
    
else:
    BASE_PATH = "/export/home/2192793m/Level-4-Project/"
    
DATA_PATH = BASE_PATH + 'data/processed/'
DATA_SETS = ["dataset_1/", "dataset_2/", "dataset_3/", "dataset_4/", "dataset_5/", "dataset_6/"]
MODEL_PATH = BASE_PATH + 'models/transferred_DCNN/'
RESULTS_PATH = BASE_PATH + 'results/transferred_DCNN/leave_one_user_out/'
FIGURE_PATH = BASE_PATH + 'reports/transferred_DCNN/figures/'
REPORT_PATH = BASE_PATH + 'reports/transferred_DCNN/'
    
from src.models.transferred_DCNN import vgg_16
from src.data import load_data
from src.visualization import visualize, multiple_plots

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive
Obtaining file:///content/gdrive/My%20Drive/Level-4-Project
Installing collected packages: src
  Running setup.py develop for src
Successfully installed src


Using TensorFlow backend.


Import remaining packages

In [0]:
import numpy as np
import sys
from six.moves import cPickle
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.utils import np_utils
import sys
from sklearn.metrics import classification_report, confusion_matrix
import csv
from keras.models import load_model
from keras.applications.vgg16 import VGG16
from keras.layers import Dense, Dropout, Flatten
from keras.models import Model
from sklearn.model_selection import StratifiedKFold

In [0]:
# Needed as originally code was for theano backend but now using tensor flow
from keras import backend as K
K.set_image_dim_ordering('th')

## Experiment Setup and Parameter Definition

### Parameter Definition

In [0]:
target_names = ['ArmFasterTowards', 'ArmSlowerTowards', 'CirclingArm', 'Clapping', 'PickingUp', 'Sitting', 'Walking']
nb_classes = len(target_names)
batch_size = 100
nb_epoch = 20
nb_epoch = 3

# input image dimensions (images are spectrograms)
img_rows, img_cols = 75, 75

load_datasets takes in the window size (1, 1.5, 2 or 3) as a string combines with an array datasets of the form ["dataset_1/", "dataset_2/",...].

Returned is a dictionary indexed by dataset name which contains the training and testing sets.

In [None]:
def load_datasets(window_size, datasets):
    datasets = {}
    # Loop through each dataset 1,2,3,4,5,6
    for dataset in DATA_SETS:
        # load the training and testing sets
        loaded_data = load_data.load_dataset(DATA_PATH + window_size + "/" + dataset)
        # convert class vectors to binary class matrices
        y_train = np_utils.to_categorical(loaded_data["train_labels"], nb_classes)
        y_test = np_utils.to_categorical(loaded_data["test_labels"], nb_classes)
        x_train = loaded_data["train_data"].astype('float32')
        x_test = loaded_data["test_data"].astype('float32')
        x_train /= 255
        x_test /= 255 
        # Stack three times to fit VGG16 (RGB) treated as grayscale
        x_train = np.hstack((x_train, x_train, x_train))
        x_test = np.hstack((x_test, x_test, x_test))
        
        # [:-1] removes backslash from string
        datasets[dataset[:-1]] = {
            "train_labels": y_train,
            "test_labels": y_test,
            "train_data": x_train,
            "test_data": x_test
        }
    return datasets

In [0]:
def make_model(img_rows, img_cols, nb_classes):
    model = vgg_16.make_model(img_rows, img_cols, nb_classes)
    sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss = "categorical_crossentropy", optimizer = sgd, metrics=["accuracy"])
    return model

## Training and Evaluating Models

In [None]:
def train_model(train_data, train_labels, test_data, test_labels):
    model = make_model(img_rows, img_cols, nb_classes)

    history = model.fit(
        train_data,
        train_labels,
        batch_size=batch_size,
        epochs=nb_epoch,
        validation_data=(test_data, test_labels),
        shuffle=True, 
        verbose=1)
    y_pred = model.predict_classes(test_data)
    report = classification_report(np.argmax(data["test_labels"],axis=1), y_pred,target_names=target_names, output_dict=True)
    conf_matrix = confusion_matrix(np.argmax(data["test_labels"],axis=1), y_pred)
    evaluation = model.evaluate(test_data, test_labels, batch_size=batch_size, verbose=0)
    value["loss"] = evaluation[0]
    value["accuracy"] = evaluation[1]
    return {
        "model": model,
        "history": history,
        "classification_report": report,
        "confusion_matrix": confusion_matrix,
        "evaluation": evaluation
    }

In [None]:
def save_results(results, window_length):
    for dataset_name, result in results.items():
        model = result.pop["model"]
        model.save(MODELS_PATH + window_length + "/" + datset_name + '.h5')
    pickle.dump(results, open(RESULTS_PATH + window_length + "_results.pkl", "wb"))

In [None]:
def load_results(window_length):
    results = pickle.load(open(RESULTS_PATH + window_length + "_results.pkl", "rb"))
    for dataset_name, result in results.items():
        result["model"] = load_model(MODELS_PATH + window_length + "/" + datset_name + '.h5')

### 3 second window

In [None]:
results_3_seconds = {}
if TRAIN_MODEL_3s:
    datasets = load_datasets("3", DATA_SETS)
    results_3_seconds = {}
    for dataset_name, data in datasets.items():
        result = train_model(data["train_data"], data["train_labels"], data["test_data"], data["test_labels"])
        results_3_seconds[dataset_name] = result
    save_results(results_3_seconds, "3")
    
else:
    try:
        results_3_seconds = load_results("3")
    except:
        print("No saved file exists")

### 2 second window

In [None]:
results_2_seconds = {}
if TRAIN_MODEL_2s:
    datasets = load_datasets("2", DATA_SETS)
    results_2_seconds = {}
    for dataset_name, data in datasets.items():
        result = train_model(data["train_data"], data["train_labels"], data["test_data"], data["test_labels"])
        results_2_seconds[dataset_name] = result
    save_results(results_2_seconds, "2")

else:
    try:
        results_2_seconds = load_results("2")
    except:
        print("No saved file exists")

### 1.5 second window

In [None]:
results_1_5_seconds = {}
if TRAIN_MODEL_1_5s:
    datasets = load_datasets("1_5", DATA_SETS)
    results_1_5_seconds = {}
    for dataset_name, data in datasets.items():
        result = train_model(data["train_data"], data["train_labels"], data["test_data"], data["test_labels"])
        results_1_5_seconds[dataset_name] = result
    save_results(results_1_5_seconds, "1_5")
else:
    try:
        results_1_5_seconds = load_results("1_5")
    except:
        print("No saved file exists")

### 1 second window

In [None]:
results_1_seconds = {}
if TRAIN_MODEL_1s:
    datasets = load_datasets("1", DATA_SETS)
    results_1_seconds = {}
    for dataset_name, data in datasets.items():
        result = train_model(data["train_data"], data["train_labels"], data["test_data"], data["test_labels"])
        results_1_seconds[dataset_name] = result
    save_results(results_1_seconds, "1")
else:
    try:
        results_1_seconds = load_results("1")
    except:
        print("No saved file exists")

## Analysis and Saving of Results

In [None]:
save_graphs = True

### Formatting of data

In [None]:
def convert_keys_to_description(results):
    # Renaming keys to make more meaninful (helps for reports and graphing)
    keys = list(results.keys())
    for key in keys:
        if key == "dataset_1":
            results["Test on A"] = results.pop(key)
        elif key == "dataset_2":
            results["Test on B"] = results.pop(key)
        elif key == "dataset_3":
            results["Test on C"] = results.pop(key)
        elif key == "dataset_4":
            results["Test on D"] = results.pop(key)
        elif key == "dataset_5":
            results["Test on E"] = results.pop(key)
        elif key == "dataset_6":
            results["Test on F"] = results.pop(key)
    return results

In [None]:
results_3_seconds = convert_keys_to_description(results_3_seconds)
results_2_seconds = convert_keys_to_description(results_2_seconds)
results_1_5_seconds = convert_keys_to_description(results_1_5_seconds)
results_1_seconds = convert_keys_to_description(results_1_seconds)

### Graphs

#### 3 Seconds, comparison of different datasets for test accuracy

In [None]:
multiple_plots.plot_multiple_val_acc(
    results_3_seconds,
    "Leave One User Out Comparison, 3 Second Window",
    save=save_graphs,
    path=REPORT_PATH + "leave_one_user_out_comparison_3sec.svg")

#### 2 Seconds, comparison of different datasets for test accuracy

In [None]:
multiple_plots.plot_multiple_val_acc(
    results_2_seconds,
    "Leave One User Out Comparison, 2 Second Window",
    save=save_graphs,
    path=REPORT_PATH + "leave_one_user_out_comparison_2sec.svg")

#### 1.5 Seconds, comparison of different datasets for test accuracy

In [None]:
multiple_plots.plot_multiple_val_acc(
    results_1_5_seconds,
    "Leave One User Out Comparison, 1.5 Second Window",
    save=save_graphs,
    path=REPORT_PATH + "leave_one_user_out_comparison_1_5sec.svg")

#### 1 Seconds, comparison of different datasets for test accuracy

In [None]:
multiple_plots.plot_multiple_val_acc(
    results_1_seconds,
    "Leave One User Out Comparison, 1 Second Window",
    save=save_graphs,
    path=REPORT_PATH + "leave_one_user_out_comparison_1sec.svg")

#### Comparison of Time Window using Dataset 1, using test accuracy

In [None]:
def get_test_accuracy(results, dataset):
    return results["dataset"]["evaluation"][1]

In [None]:
dataset = "Test on A"
window_results = {
    "3": get_avg_precision(results_3_seconds, dataset),
    "2": get_avg_precision(results_2_seconds, dataset),
    "1_5": get_avg_precision(results_1_5_seconds, dataset)
    "1": get_avg_precision(results_1_seconds, dataset)
}

In [None]:
multiple_plots.plot_evaluation_bar(
    window_results,
    ["3", "2", "1.5", "1"],
    "Comparison of window size, Trained on B-F, Tested on A",
    "Window Size (s)",
    "Accuracy",
    save=save_graphs,
    path=REPORT_PATH + "window_size_comparison.svg"
    )

### Classification Report and Confusion Matricies

In [0]:
# @TODO