In [1]:
from peratouch.data import Data, load_data
from peratouch.trainer import Trainer 
from peratouch.results import Results 
from peratouch.networks import CNN
from peratouch.config import path_five_users_main, path_five_users_first
import sklearn
import numpy as np

def run_n_users(X, y, n_folds=5):
    """
    Runs entire routine of fitting CNN model to dataset (X, y)self.
    Performs Cross-Validation of n_folds on input dataset.
    Assumes data is already shuffled.
    """

    D = Data(X, y)

    n_out = len(np.unique(y))

    # Create indices of several folds
    D.make_folds(n_folds)     # Makes indices available inside class

    predictions = []
    actual_vals = []

    # for _ in range(n_folds):     # Run all folds 
    for _ in range(1):     # Run all folds
        D.next_fold()
        D.normalize()
        D.tensors_to_device()
        D.print_shapes()
        # D.plot_data()
        model = CNN(n_ch=1, out_size=n_out)      # Initialize new model each fold
        T = Trainer(D)
        T.setup(model, max_epochs=20, batch_size=int(len(D.xtr)/20))       # 20 minibatches
        T.train_model(model)
        # T.plot_train()
        R = Results(D, model)
        R.test_metrics()
        preds, actual = R.get_preds_actual()

        predictions.extend(preds)
        actual_vals.extend(actual)

    print(sklearn.metrics.classification_report(actual_vals, predictions))
    return actual_vals, predictions

In [2]:
from peratouch.config import path_analysis_results
import itertools

results_dict = {}

number_users = range(3, 4)
for n_users in number_users:     # Number of possible users: 2, 3, 4, 5

    user_combinations = itertools.combinations(range(5), n_users)      # From 5 users choose n_users

    for users in user_combinations:
        print("\n\nRunning combination of users ", users)

        Xraw, yraw = load_data(path_five_users_main)

        # Choose one given combination of users
        Xraw = np.concatenate([Xraw[yraw==u] for u in users])
        yraw = np.concatenate([yraw[yraw==u] for u in users])

        # Change labels to be fit increasing range ie. 0, 1, 2, ....
        for i, u in enumerate(users):
            yraw[yraw==u] = i

        # Shuffle data to destroy ordering of users
        Xraw, yraw = sklearn.utils.shuffle(Xraw, yraw, random_state=42)

        # Run same routine for users selected
        results_dict[str(users)] = run_n_users(Xraw, yraw)

        # Store results at each run
        np.savez(str(path_analysis_results / "number_users.npz"), **results_dict)



Running combination of users  (0, 1, 2)


-- New Fold --
Train, test and validation arrays normalized to:
[1.], [1.0024], [0.9983]
Using Device:  cpu , dtype:  torch.float32

Raw data shape:  (66079, 1, 32) 
Labels shape:  (66079,) 
Shape of test set: (13216, 1, 32) 
Shape of train set: (44933, 1, 32) 
Shape of validation set: (7930, 1, 32) 
Unique labels:  [0 1 2] 
Fraction of test labels:  [0.36, 0.3, 0.34] 
Fraction of validation labels:  [0.37, 0.29, 0.34] 
Fraction of train labels:  [0.37, 0.3, 0.34] 
dtype of inputs:  torch.float32

 Start of training model:

End of epoch 1: loss_tr=0.595, loss_val=0.616, train=77.1%, val=76.1%
End of epoch 3: loss_tr=0.532, loss_val=0.550, train=79.0%, val=78.0%
End of epoch 5: loss_tr=0.477, loss_val=0.495, train=81.7%, val=81.1%
End of epoch 7: loss_tr=0.487, loss_val=0.507, train=80.4%, val=79.9%
End of epoch 9: loss_tr=0.539, loss_val=0.557, train=77.0%, val=75.9%
End of epoch 11: loss_tr=0.472, loss_val=0.488, train=81.0%, val=80.3%
End o

End of epoch 3: loss_tr=0.646, loss_val=0.661, train=70.5%, val=69.4%
End of epoch 5: loss_tr=0.622, loss_val=0.639, train=71.9%, val=71.1%
End of epoch 7: loss_tr=0.602, loss_val=0.621, train=73.2%, val=72.6%
End of epoch 9: loss_tr=0.620, loss_val=0.637, train=72.0%, val=71.0%
End of epoch 11: loss_tr=0.612, loss_val=0.633, train=72.4%, val=71.2%
End of epoch 13: loss_tr=0.593, loss_val=0.614, train=73.1%, val=72.2%
End of epoch 15: loss_tr=0.592, loss_val=0.607, train=73.4%, val=71.9%
End of epoch 17: loss_tr=0.555, loss_val=0.572, train=75.2%, val=74.1%
End of epoch 19: loss_tr=0.566, loss_val=0.584, train=74.3%, val=73.3%

Training Complete!
Loading best weights for lowest validation loss=0.572 ...

Average running time per epoch: 0.72 seconds
Total running time: 13.77 seconds

Test dataset metrics:
Overall Accuracy = 74.6%, Matthews Corr Coef = 0.62


              precision    recall  f1-score   support

           0       0.71      0.74      0.72      4729
           1       0.


Training Complete!
Loading best weights for lowest validation loss=0.550 ...

Average running time per epoch: 0.67 seconds
Total running time: 12.74 seconds

Test dataset metrics:
Overall Accuracy = 76.8%, Matthews Corr Coef = 0.65


              precision    recall  f1-score   support

           0       0.85      0.74      0.79      3910
           1       0.71      0.81      0.75      4478
           2       0.78      0.75      0.76      3629

    accuracy                           0.77     12017
   macro avg       0.78      0.77      0.77     12017
weighted avg       0.77      0.77      0.77     12017



Running combination of users  (2, 3, 4)


-- New Fold --
Train, test and validation arrays normalized to:
[1.], [1.0005], [0.999]
Using Device:  cpu , dtype:  torch.float32

Raw data shape:  (62660, 1, 32) 
Labels shape:  (62660,) 
Shape of test set: (12532, 1, 32) 
Shape of train set: (42608, 1, 32) 
Shape of validation set: (7520, 1, 32) 
Unique labels:  [0 1 2] 
Fraction of te

In [3]:
stored_results = np.load(str(path_analysis_results / "number_users.npz"))

dict_plot = {}
# Initialize lists for numner of users
for n in number_users:
    dict_plot[n] = []

for key in stored_results:
    print(key, " : ", len(results_dict[key]))
    print(sklearn.metrics.classification_report(*results_dict[key]))
    actual, pred = stored_results[key]
    dict_plot[len(eval(key))].append(np.mean(actual==pred))

(0, 1, 2)  :  2
              precision    recall  f1-score   support

           0       0.78      0.90      0.83      4703
           1       0.87      0.85      0.86      4015
           2       0.85      0.72      0.78      4498

    accuracy                           0.83     13216
   macro avg       0.83      0.83      0.83     13216
weighted avg       0.83      0.83      0.82     13216

(0, 1, 3)  :  2
              precision    recall  f1-score   support

           0       0.85      0.91      0.88      4722
           1       0.84      0.76      0.80      3963
           2       0.79      0.80      0.80      4556

    accuracy                           0.83     13241
   macro avg       0.83      0.82      0.82     13241
weighted avg       0.83      0.83      0.83     13241

(0, 1, 4)  :  2
              precision    recall  f1-score   support

           0       0.81      0.79      0.80      4755
           1       0.90      0.88      0.89      3992
           2       0.69    

In [5]:
for key in dict_plot:
    run_data = dict_plot[key]
    print(key, " : ", run_data)
    print(key, ": ", np.mean(run_data), " +/- ", np.std(run_data))





3  :  [0.8261198547215496, 0.8274299524205121, 0.7991082286177543, 0.8058301831927887, 0.746147859922179, 0.768, 0.8065591564583656, 0.8437291527685123, 0.7680785553798785, 0.802026811362911]
3 :  0.799302975484445  +/-  0.028925728820840466
