# ToDo :

* ~~consolidate code from notebook into WISDM helper methods file~~
* rewrite kfolds process so that we have more precise control over the size of a fold (as oppose to just the number of folds)
* run experiment that directly compares model using ALL general data + active data to model using ONLY general data from nearest cluster + active data
* analyze/visualize clusters (is there a better algorithm? is there a better k for the k-means?) 
* perhaps compare with using the WORST cluster, or using ONLY the personal data
    * for each size of active data
        * for each algorithm (also ensemble algorithm?)
            * personal only
            * universal only
            * personal + ALL universal
            * personal + best cluster universal
            * personal + worst cluster universal


In [1]:
from wisdm import wisdm
import numpy as np
import pandas as pd

from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit
from sklearn.cluster import KMeans
from scipy.stats import mode

In [2]:
import importlib
importlib.reload(wisdm)

<module 'wisdm.wisdm' from '/Users/scottcambo/Documents/grad_school/SegalProject/WISDM/wisdm/wisdm.py'>

# Test Universal Impersonal Model Pipeline

In [3]:
import warnings

In [13]:
rf_results = []

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    for user_id in wisdm.user_ids:
        test_set = wisdm.get_user_set(user_id)
        test_set = wisdm.remove_all_nan(test_set)

        test_labels = np.array([t.decode("utf-8") for t in test_set['class'].as_matrix()])
        test_features = test_set.as_matrix(columns=[test_set.columns[1:-1]])

        #print("%s labels, %s features" % (len(test_labels), len(test_features)))
        # training features & labels
        training_set = wisdm.data_df[wisdm.data_df['user'] != user_id]
        training_set = wisdm.remove_all_nan(training_set)
        training_labels = np.array([t.decode("utf-8") for t in training_set['class'].as_matrix()])
        training_features = training_set.as_matrix(columns=[test_set.columns[1:-1]])

        # normalize features
        scaler = StandardScaler().fit(training_features)
        scaled_train_x = scaler.transform(training_features)
        scaled_test_x = scaler.transform(test_features)

        clf = wisdm.weka_RF()
        clf.fit(scaled_train_x, training_labels)
        predictions = clf.predict(scaled_test_x)

        score = accuracy_score(test_labels, predictions)
        print("Score for %s: %.3f" % (user_id, score))
        rf_results.append(score)

print("RF results : M=%.5f, SD=%.5f" % (np.mean(rf_results), np.std(rf_results)))

Score for 33: 0.80
Score for 17: 0.32
Score for 20: 0.82
Score for 29: 0.78
Score for 13: 0.85
Score for 15: 0.91
Score for 6: 0.91
Score for 27: 0.69
Score for 36: 0.75
Score for 18: 0.85
Score for 32: 0.80
Score for 35: 0.63
Score for 11: 0.79
Score for 16: 0.75
Score for 5: 0.86
Score for 10: 0.77
Score for 28: 0.90
Score for 26: 0.79
Score for 14: 0.73
Score for 24: 0.81
Score for 12: 0.85
Score for 23: 0.73
Score for 4: 0.67
Score for 30: 0.16
Score for 34: 0.83
Score for 8: 0.98
Score for 31: 0.84
Score for 21: 0.65
Score for 3: 0.79
Score for 22: 0.81
Score for 1: 0.78
Score for 9: 0.28
Score for 25: 0.92
Score for 2: 0.52
Score for 7: 0.85
Score for 19: 0.85
RF results : M=0.750052119285, SD=0.174189161602


# Cross-Validation with exact number of data points

### Experiment Attributes : 
* amount of training set from individual end user
* amount of training set from impersonal data (other users)
    * "ALL" all other data
    * "closest cluster" only data from the closest cluster
    * "furthest cluster" only data from the furthest cluster
    * "All - furthest cluster" all other data EXCEPT data from furthest cluster
* test user id
* algorithm
* algorithm parameters



In [4]:
def personal_model(active_features, active_labels, test_features, test_labels):
    scaler = StandardScaler().fit(active_features)
    scaled_train_x = scaler.transform(active_features)
    scaled_test_x = scaler.transform(test_features)

    rfc_clf = wisdm.weka_RF()
    rfc_clf.fit(scaled_train_x, active_labels)
    predictions = rfc_clf.predict(scaled_test_x)
    score = accuracy_score(test_labels, predictions)
    return score

In [5]:
def universal_model(universal_features, universal_labels, test_features, test_labels):
    scaler = StandardScaler().fit(universal_features)
    scaled_train_x = scaler.transform(universal_features)
    scaled_test_x = scaler.transform(test_features)

    rfc_clf = wisdm.weka_RF()
    rfc_clf.fit(scaled_train_x, universal_labels)
    predictions = rfc_clf.predict(scaled_test_x)
    score = accuracy_score(test_labels, predictions)

    return score

In [14]:
def universal_plus_personal_model(personal_features, personal_labels,
                                  universal_features, universal_labels,
                                  test_features, test_labels):
    personal_plus_universal_features = np.vstack((personal_features, universal_features))
    personal_plus_universal_labels = np.hstack((personal_labels, universal_labels))

    scaler = StandardScaler().fit(personal_plus_universal_features)
    scaled_train_x = scaler.transform(personal_plus_universal_features)
    scaled_test_x = scaler.transform(test_features)

    rfc_clf = wisdm.weka_RF()

    rfc_clf.fit(scaled_train_x, personal_plus_universal_labels)
    predictions = rfc_clf.predict(scaled_test_x)
    score = accuracy_score(test_labels, predictions)
    return score

In [18]:
def cluster_plus_personal_model(personal_features, personal_labels,
                                  universal_features, universal_labels,
                                  test_features, test_labels, KM):
    cluster_predictions = KM.predict(personal_features)
    closest_cluster = mode(cluster_predictions).mode[0]

    cluster_data_indeces = [i for i in range(len(clusters)) if clusters[i] == closest_cluster]
    cluster_features = universal_features[cluster_data_indeces]
    cluster_labels = universal_labels[cluster_data_indeces]

    training_features = np.vstack((personal_features, cluster_features))
    training_labels = np.hstack((personal_labels, cluster_labels))

    scaler = StandardScaler().fit(training_features)
    scaled_train_x = scaler.transform(training_features)
    scaled_test_x = scaler.transform(test_features)

    rfc_clf = wisdm.weka_RF()

    rfc_clf.fit(scaled_train_x, training_labels)
    predictions = rfc_clf.predict(scaled_test_x)
    score = accuracy_score(test_labels, predictions)
    return score

In [28]:
from collections import Counter

In [40]:
training_sizes = [10,20,30,40,50,60,70,80,90,100]

scores = []
err = None
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    for ind, user_id in enumerate(wisdm.user_ids):
        print("Running user #%s: %s" % (ind, user_id))
        personal_set = wisdm.get_user_set(user_id)
        personal_set = wisdm.remove_all_nan(personal_set)

        personal_labels = np.array([t.decode("utf-8") for t in personal_set['class'].as_matrix()])
        personal_features = personal_set.as_matrix(columns=[personal_set.columns[1:-1]])
        
        # What is the distribution of labels for this participant?
        personal_labels_distribution = Counter(personal_labels)
        print("\tHas %s labels : " % len(personal_labels))
        for label_key, number in personal_labels_distribution.items():
            print("\t\t%s:%s" % (label_key, number))
        print("\n")
        # training features & labels
        universal_set = wisdm.data_df[wisdm.data_df['user'] != user_id]
        universal_set = wisdm.remove_all_nan(universal_set)
        universal_labels = np.array([t.decode("utf-8") for t in universal_set['class'].as_matrix()])
        universal_features = universal_set.as_matrix(columns=[universal_set.columns[1:-1]])
        
        # get k-means clusters
        number_of_clusters = 4 # the higher this number is, the smaller we should expect each cluster to be

        KM = KMeans(n_clusters=number_of_clusters)
        clusters = KM.fit_predict(universal_features)
        
        k = 10
        
        skf = StratifiedKFold(n_splits=k)
        
        k_run = 0
        for active_index, test_index in skf.split(personal_features, personal_labels):
            print("\tRunning Fold #%s\n" % k_run)
            # data set available for active labeling from the individual
            all_active_features = personal_features[active_index]
            all_active_labels = personal_labels[active_index]


            # held out test set from individual
            test_features = personal_features[test_index]
            test_labels = personal_labels[test_index]

            # iterate through size of training data
            for training_size in training_sizes:
                # initialize score holders
                personal_model_scores = []
                universal_model_scores = []
                personal_plus_all_scores = []
                personal_plus_cluster_scores = []

                # run universal model
                universal_model_score = universal_model(universal_features, universal_labels,
                                                        test_features, test_labels)
                universal_model_scores.append(universal_model_score)

                sss = StratifiedShuffleSplit(n_splits=5, train_size=training_size, test_size=6)

                splits = sss.split(all_active_features, all_active_labels)

                try:
                    for split_num, split_tup in enumerate(splits):
                        sampled_active_index, __ = split_tup
                        sampled_active_features = personal_features[sampled_active_index]
                        sampled_active_labels = personal_labels[sampled_active_index]

                        # run personal model
                        personal_score = personal_model(sampled_active_features, sampled_active_labels, test_features, test_labels)
                        personal_model_scores.append(personal_score)

                        # run personal + universal
                        personal_plus_all_score = universal_plus_personal_model(sampled_active_features, sampled_active_labels,
                                                                                universal_features, universal_labels,
                                                                                test_features, test_labels)
                        personal_plus_all_scores.append(personal_plus_all_score)

                        # run personal + cluster
                        personal_plus_cluster_score = cluster_plus_personal_model(sampled_active_features, sampled_active_labels,
                                                                                universal_features, universal_labels,
                                                                                test_features, test_labels, KM)
                        personal_plus_cluster_scores.append(personal_plus_cluster_score)
                except ValueError as ve:
                    print("Error with training size during split #%s" % split_num)
                    print("Message : %s" % ve.args[0])
                    err = ve
                    if "Reduce test_size and/or train_size" in ve.args[0]:
                        print("continuing...")
                        continue
                    elif "should be smaller than the number of samples" in ve.args[0]:
                        print("continuing...")
                        continue
                    elif "The least populated class in y has only 1 member" in ve.args[0]:
                        print("continuing...")
                        continue
                    else:
                        raise(ve)

                row = {"test user" : user_id,
                       "k-run" : k_run,
                   "classifier" : "RF with Wiki Parameters",
                   "personal training data" : training_size,
                   "personal score Mean" : np.mean(personal_model_scores),
                   "personal score STD" : np.std(personal_model_scores),
                   "impersonal score Mean" : np.mean(universal_model_scores),
                   "impersonal score STD" : np.std(universal_model_scores),
                   "personal + impersonal score Mean" : np.mean(personal_plus_all_scores),
                   "personal + impersonal score STD" : np.std(personal_plus_all_scores),
                   "personal + cluster score Mean" : np.mean(personal_plus_cluster_scores),
                   "personal + cluster score STD" : np.std(personal_plus_cluster_scores)
                   }
                print("\tamount of personal data : %s row" % training_size)
                print("\tpersonal model score : M=%.3f, SD=%.3f" % (row["personal score Mean"], row["personal score STD"]))
                print("\tuniversal model score : M=%.3f, SD=%.3f" % (row["impersonal score Mean"], row["impersonal score STD"]))
                print("\tpersonal + ALL universal : M=%.3f, SD=%.3f" % (row["personal + impersonal score Mean"], row["personal + impersonal score STD"]))
                print("\tpersonal + CLUSTER universal : M=%.3f, SD=%.3f" % (row["personal + cluster score Mean"], row["personal + cluster score STD"]))
                print("\n")
                scores.append(row)
            k_run += 1

scores_df = pd.DataFrame(scores)

Running user #0: 33
	Has 149 labels : 
		Jogging:15
		Walking:74
		Upstairs:12
		Downstairs:24
		Standing:7
		Sitting:17


	Running Fold #0

	amount of personal data : 10 row
	personal model score : M=0.533, SD=0.044
	universal model score : M=0.722, SD=0.000
	personal + ALL universal : M=0.778, SD=0.070
	personal + CLUSTER universal : M=0.800, SD=0.075


	amount of personal data : 20 row
	personal model score : M=0.700, SD=0.057
	universal model score : M=0.667, SD=0.000
	personal + ALL universal : M=0.700, SD=0.057
	personal + CLUSTER universal : M=0.744, SD=0.090


	amount of personal data : 30 row
	personal model score : M=0.700, SD=0.075
	universal model score : M=0.611, SD=0.000
	personal + ALL universal : M=0.711, SD=0.042
	personal + CLUSTER universal : M=0.789, SD=0.096


	amount of personal data : 40 row
	personal model score : M=0.756, SD=0.057
	universal model score : M=0.667, SD=0.000
	personal + ALL universal : M=0.811, SD=0.057
	personal + CLUSTER universal : M=0.800, SD

	amount of personal data : 80 row
	personal model score : M=0.847, SD=0.029
	universal model score : M=0.765, SD=0.000
	personal + ALL universal : M=0.929, SD=0.044
	personal + CLUSTER universal : M=0.965, SD=0.029


	amount of personal data : 90 row
	personal model score : M=0.847, SD=0.047
	universal model score : M=0.824, SD=0.000
	personal + ALL universal : M=0.953, SD=0.044
	personal + CLUSTER universal : M=0.953, SD=0.044


	amount of personal data : 100 row
	personal model score : M=0.871, SD=0.024
	universal model score : M=0.824, SD=0.000
	personal + ALL universal : M=0.976, SD=0.029
	personal + CLUSTER universal : M=0.988, SD=0.024


	Running Fold #4

	amount of personal data : 10 row
	personal model score : M=0.587, SD=0.050
	universal model score : M=0.867, SD=0.000
	personal + ALL universal : M=0.867, SD=0.042
	personal + CLUSTER universal : M=0.947, SD=0.050


	amount of personal data : 20 row
	personal model score : M=0.680, SD=0.088
	universal model score : M=0.867, SD=

	amount of personal data : 60 row
	personal model score : M=0.950, SD=0.041
	universal model score : M=0.750, SD=0.000
	personal + ALL universal : M=0.933, SD=0.062
	personal + CLUSTER universal : M=0.950, SD=0.041


	amount of personal data : 70 row
	personal model score : M=0.950, SD=0.041
	universal model score : M=0.750, SD=0.000
	personal + ALL universal : M=0.917, SD=0.000
	personal + CLUSTER universal : M=0.967, SD=0.041


	amount of personal data : 80 row
	personal model score : M=0.967, SD=0.041
	universal model score : M=0.833, SD=0.000
	personal + ALL universal : M=0.933, SD=0.033
	personal + CLUSTER universal : M=0.967, SD=0.041


	amount of personal data : 90 row
	personal model score : M=0.950, SD=0.041
	universal model score : M=0.833, SD=0.000
	personal + ALL universal : M=0.917, SD=0.053
	personal + CLUSTER universal : M=0.950, SD=0.041


	amount of personal data : 100 row
	personal model score : M=1.000, SD=0.000
	universal model score : M=0.750, SD=0.000
	personal + 

	amount of personal data : 40 row
	personal model score : M=1.000, SD=0.000
	universal model score : M=0.455, SD=0.000
	personal + ALL universal : M=0.982, SD=0.036
	personal + CLUSTER universal : M=0.982, SD=0.036


	amount of personal data : 50 row
	personal model score : M=1.000, SD=0.000
	universal model score : M=0.455, SD=0.000
	personal + ALL universal : M=0.945, SD=0.045
	personal + CLUSTER universal : M=0.945, SD=0.045


	amount of personal data : 60 row
	personal model score : M=1.000, SD=0.000
	universal model score : M=0.545, SD=0.000
	personal + ALL universal : M=0.945, SD=0.073
	personal + CLUSTER universal : M=0.964, SD=0.045


	amount of personal data : 70 row
	personal model score : M=1.000, SD=0.000
	universal model score : M=0.455, SD=0.000
	personal + ALL universal : M=1.000, SD=0.000
	personal + CLUSTER universal : M=1.000, SD=0.000


	amount of personal data : 80 row
	personal model score : M=1.000, SD=0.000
	universal model score : M=0.455, SD=0.000
	personal + A

	amount of personal data : 40 row
	personal model score : M=1.000, SD=0.000
	universal model score : M=0.400, SD=0.000
	personal + ALL universal : M=0.920, SD=0.075
	personal + CLUSTER universal : M=0.960, SD=0.049


	amount of personal data : 50 row
	personal model score : M=0.980, SD=0.040
	universal model score : M=0.400, SD=0.000
	personal + ALL universal : M=0.940, SD=0.080
	personal + CLUSTER universal : M=1.000, SD=0.000


	amount of personal data : 60 row
	personal model score : M=1.000, SD=0.000
	universal model score : M=0.300, SD=0.000
	personal + ALL universal : M=0.980, SD=0.040
	personal + CLUSTER universal : M=1.000, SD=0.000


	amount of personal data : 70 row
	personal model score : M=1.000, SD=0.000
	universal model score : M=0.400, SD=0.000
	personal + ALL universal : M=1.000, SD=0.000
	personal + CLUSTER universal : M=1.000, SD=0.000


	amount of personal data : 80 row
	personal model score : M=1.000, SD=0.000
	universal model score : M=0.300, SD=0.000
	personal + A

	amount of personal data : 40 row
	personal model score : M=0.844, SD=0.113
	universal model score : M=0.222, SD=0.000
	personal + ALL universal : M=0.556, SD=0.157
	personal + CLUSTER universal : M=0.689, SD=0.147


	amount of personal data : 50 row
	personal model score : M=0.822, SD=0.113
	universal model score : M=0.222, SD=0.000
	personal + ALL universal : M=0.733, SD=0.054
	personal + CLUSTER universal : M=0.711, SD=0.113


	amount of personal data : 60 row
	personal model score : M=0.867, SD=0.083
	universal model score : M=0.111, SD=0.000
	personal + ALL universal : M=0.667, SD=0.122
	personal + CLUSTER universal : M=0.778, SD=0.070


	amount of personal data : 70 row
	personal model score : M=0.889, SD=0.099
	universal model score : M=0.444, SD=0.000
	personal + ALL universal : M=0.844, SD=0.054
	personal + CLUSTER universal : M=0.822, SD=0.054


	amount of personal data : 80 row
	personal model score : M=0.911, SD=0.044
	universal model score : M=0.111, SD=0.000
	personal + A

	amount of personal data : 20 row
	personal model score : M=0.781, SD=0.049
	universal model score : M=0.857, SD=0.000
	personal + ALL universal : M=0.886, SD=0.023
	personal + CLUSTER universal : M=0.857, SD=0.052


	amount of personal data : 30 row
	personal model score : M=0.810, SD=0.052
	universal model score : M=0.857, SD=0.000
	personal + ALL universal : M=0.886, SD=0.023
	personal + CLUSTER universal : M=0.857, SD=0.067


	amount of personal data : 40 row
	personal model score : M=0.829, SD=0.038
	universal model score : M=0.810, SD=0.000
	personal + ALL universal : M=0.886, SD=0.023
	personal + CLUSTER universal : M=0.914, SD=0.036


	amount of personal data : 50 row
	personal model score : M=0.829, SD=0.038
	universal model score : M=0.857, SD=0.000
	personal + ALL universal : M=0.905, SD=0.030
	personal + CLUSTER universal : M=0.914, SD=0.036


	amount of personal data : 60 row
	personal model score : M=0.867, SD=0.056
	universal model score : M=0.905, SD=0.000
	personal + A

	amount of personal data : 100 row
	personal model score : M=0.894, SD=0.024
	universal model score : M=0.706, SD=0.000
	personal + ALL universal : M=0.882, SD=0.037
	personal + CLUSTER universal : M=0.871, SD=0.024


	Running Fold #7

	amount of personal data : 10 row
	personal model score : M=0.659, SD=0.114
	universal model score : M=0.882, SD=0.000
	personal + ALL universal : M=0.871, SD=0.024
	personal + CLUSTER universal : M=0.800, SD=0.047


	amount of personal data : 20 row
	personal model score : M=0.835, SD=0.094
	universal model score : M=0.882, SD=0.000
	personal + ALL universal : M=0.882, SD=0.037
	personal + CLUSTER universal : M=0.859, SD=0.060


	amount of personal data : 30 row
	personal model score : M=0.894, SD=0.078
	universal model score : M=0.882, SD=0.000
	personal + ALL universal : M=0.941, SD=0.037
	personal + CLUSTER universal : M=0.965, SD=0.047


	amount of personal data : 40 row
	personal model score : M=0.918, SD=0.071
	universal model score : M=0.882, SD=

	amount of personal data : 70 row
	personal model score : M=0.838, SD=0.049
	universal model score : M=0.714, SD=0.000
	personal + ALL universal : M=0.857, SD=0.030
	personal + CLUSTER universal : M=0.819, SD=0.047


	amount of personal data : 80 row
	personal model score : M=0.876, SD=0.038
	universal model score : M=0.714, SD=0.000
	personal + ALL universal : M=0.905, SD=0.067
	personal + CLUSTER universal : M=0.895, SD=0.070


	amount of personal data : 90 row
	personal model score : M=0.857, SD=0.030
	universal model score : M=0.714, SD=0.000
	personal + ALL universal : M=0.924, SD=0.023
	personal + CLUSTER universal : M=0.895, SD=0.036


	amount of personal data : 100 row
	personal model score : M=0.829, SD=0.038
	universal model score : M=0.714, SD=0.000
	personal + ALL universal : M=0.905, SD=0.052
	personal + CLUSTER universal : M=0.886, SD=0.065


	Running Fold #1

	amount of personal data : 10 row
	personal model score : M=0.620, SD=0.121
	universal model score : M=0.750, SD=

	amount of personal data : 50 row
	personal model score : M=0.811, SD=0.057
	universal model score : M=0.778, SD=0.000
	personal + ALL universal : M=0.856, SD=0.057
	personal + CLUSTER universal : M=0.778, SD=0.050


	amount of personal data : 60 row
	personal model score : M=0.833, SD=0.035
	universal model score : M=0.778, SD=0.000
	personal + ALL universal : M=0.844, SD=0.022
	personal + CLUSTER universal : M=0.789, SD=0.042


	amount of personal data : 70 row
	personal model score : M=0.822, SD=0.022
	universal model score : M=0.778, SD=0.000
	personal + ALL universal : M=0.856, SD=0.027
	personal + CLUSTER universal : M=0.811, SD=0.027


	amount of personal data : 80 row
	personal model score : M=0.878, SD=0.042
	universal model score : M=0.778, SD=0.000
	personal + ALL universal : M=0.922, SD=0.044
	personal + CLUSTER universal : M=0.867, SD=0.027


	amount of personal data : 90 row
	personal model score : M=0.933, SD=0.022
	universal model score : M=0.778, SD=0.000
	personal + A

	amount of personal data : 30 row
	personal model score : M=0.850, SD=0.031
	universal model score : M=0.812, SD=0.000
	personal + ALL universal : M=0.825, SD=0.047
	personal + CLUSTER universal : M=0.775, SD=0.031


	amount of personal data : 40 row
	personal model score : M=0.825, SD=0.047
	universal model score : M=0.750, SD=0.000
	personal + ALL universal : M=0.875, SD=0.040
	personal + CLUSTER universal : M=0.850, SD=0.050


	amount of personal data : 50 row
	personal model score : M=0.838, SD=0.031
	universal model score : M=0.812, SD=0.000
	personal + ALL universal : M=0.863, SD=0.025
	personal + CLUSTER universal : M=0.825, SD=0.047


	amount of personal data : 60 row
	personal model score : M=0.887, SD=0.047
	universal model score : M=0.750, SD=0.000
	personal + ALL universal : M=0.875, SD=0.056
	personal + CLUSTER universal : M=0.875, SD=0.040


	amount of personal data : 70 row
	personal model score : M=0.912, SD=0.031
	universal model score : M=0.750, SD=0.000
	personal + A

	amount of personal data : 100 row
	personal model score : M=0.920, SD=0.060
	universal model score : M=0.800, SD=0.000
	personal + ALL universal : M=0.920, SD=0.051
	personal + CLUSTER universal : M=0.880, SD=0.051


	Running Fold #2

	amount of personal data : 10 row
	personal model score : M=0.747, SD=0.039
	universal model score : M=0.947, SD=0.000
	personal + ALL universal : M=0.979, SD=0.026
	personal + CLUSTER universal : M=0.958, SD=0.039


	amount of personal data : 20 row
	personal model score : M=0.800, SD=0.021
	universal model score : M=1.000, SD=0.000
	personal + ALL universal : M=0.989, SD=0.021
	personal + CLUSTER universal : M=0.905, SD=0.061


	amount of personal data : 30 row
	personal model score : M=0.811, SD=0.054
	universal model score : M=1.000, SD=0.000
	personal + ALL universal : M=1.000, SD=0.000
	personal + CLUSTER universal : M=0.905, SD=0.052


	amount of personal data : 40 row
	personal model score : M=0.832, SD=0.039
	universal model score : M=0.947, SD=

	amount of personal data : 80 row
	personal model score : M=0.882, SD=0.074
	universal model score : M=0.882, SD=0.000
	personal + ALL universal : M=0.918, SD=0.047
	personal + CLUSTER universal : M=0.882, SD=0.074


	amount of personal data : 90 row
	personal model score : M=0.929, SD=0.024
	universal model score : M=0.941, SD=0.000
	personal + ALL universal : M=0.965, SD=0.029
	personal + CLUSTER universal : M=0.929, SD=0.044


	amount of personal data : 100 row
	personal model score : M=0.941, SD=0.000
	universal model score : M=0.882, SD=0.000
	personal + ALL universal : M=0.988, SD=0.024
	personal + CLUSTER universal : M=0.941, SD=0.000


	Running Fold #6

	amount of personal data : 10 row
	personal model score : M=0.750, SD=0.190
	universal model score : M=0.875, SD=0.000
	personal + ALL universal : M=0.938, SD=0.000
	personal + CLUSTER universal : M=0.900, SD=0.031


	amount of personal data : 20 row
	personal model score : M=0.875, SD=0.000
	universal model score : M=0.938, SD=

	amount of personal data : 60 row
	personal model score : M=0.773, SD=0.053
	universal model score : M=0.667, SD=0.000
	personal + ALL universal : M=0.720, SD=0.027
	personal + CLUSTER universal : M=0.773, SD=0.053


	amount of personal data : 70 row
	personal model score : M=0.773, SD=0.053
	universal model score : M=0.667, SD=0.000
	personal + ALL universal : M=0.747, SD=0.027
	personal + CLUSTER universal : M=0.760, SD=0.053


	amount of personal data : 80 row
	personal model score : M=0.813, SD=0.065
	universal model score : M=0.667, SD=0.000
	personal + ALL universal : M=0.747, SD=0.027
	personal + CLUSTER universal : M=0.813, SD=0.027


	amount of personal data : 90 row
	personal model score : M=0.800, SD=0.042
	universal model score : M=0.667, SD=0.000
	personal + ALL universal : M=0.733, SD=0.060
	personal + CLUSTER universal : M=0.787, SD=0.065


	amount of personal data : 100 row
	personal model score : M=0.760, SD=0.033
	universal model score : M=0.667, SD=0.000
	personal + 

	amount of personal data : 30 row
	personal model score : M=0.871, SD=0.029
	universal model score : M=1.000, SD=0.000
	personal + ALL universal : M=0.971, SD=0.035
	personal + CLUSTER universal : M=0.957, SD=0.035


	amount of personal data : 40 row
	personal model score : M=0.900, SD=0.035
	universal model score : M=0.929, SD=0.000
	personal + ALL universal : M=0.971, SD=0.035
	personal + CLUSTER universal : M=0.971, SD=0.035


	amount of personal data : 50 row
	personal model score : M=0.929, SD=0.045
	universal model score : M=1.000, SD=0.000
	personal + ALL universal : M=0.986, SD=0.029
	personal + CLUSTER universal : M=0.986, SD=0.029


	amount of personal data : 60 row
	personal model score : M=0.914, SD=0.029
	universal model score : M=0.929, SD=0.000
	personal + ALL universal : M=0.986, SD=0.029
	personal + CLUSTER universal : M=0.986, SD=0.029


	amount of personal data : 70 row
	personal model score : M=0.929, SD=0.000
	universal model score : M=0.929, SD=0.000
	personal + A

	amount of personal data : 10 row
	personal model score : M=0.833, SD=0.000
	universal model score : M=0.917, SD=0.000
	personal + ALL universal : M=0.933, SD=0.033
	personal + CLUSTER universal : M=0.900, SD=0.033


	amount of personal data : 20 row
	personal model score : M=0.833, SD=0.000
	universal model score : M=0.917, SD=0.000
	personal + ALL universal : M=0.900, SD=0.033
	personal + CLUSTER universal : M=0.883, SD=0.041


	amount of personal data : 30 row
	personal model score : M=0.933, SD=0.033
	universal model score : M=0.917, SD=0.000
	personal + ALL universal : M=0.917, SD=0.000
	personal + CLUSTER universal : M=0.900, SD=0.033


	amount of personal data : 40 row
	personal model score : M=0.917, SD=0.000
	universal model score : M=0.917, SD=0.000
	personal + ALL universal : M=0.983, SD=0.033
	personal + CLUSTER universal : M=0.950, SD=0.041


	amount of personal data : 50 row
	personal model score : M=0.917, SD=0.053
	universal model score : M=0.917, SD=0.000
	personal + A

	amount of personal data : 80 row
	personal model score : M=0.822, SD=0.022
	universal model score : M=0.889, SD=0.000
	personal + ALL universal : M=0.956, SD=0.042
	personal + CLUSTER universal : M=0.956, SD=0.042


	amount of personal data : 90 row
	personal model score : M=0.833, SD=0.035
	universal model score : M=0.889, SD=0.000
	personal + ALL universal : M=0.944, SD=0.035
	personal + CLUSTER universal : M=0.956, SD=0.022


	amount of personal data : 100 row
	personal model score : M=0.856, SD=0.044
	universal model score : M=0.833, SD=0.000
	personal + ALL universal : M=0.978, SD=0.027
	personal + CLUSTER universal : M=0.989, SD=0.022


	Running Fold #1

	amount of personal data : 10 row
	personal model score : M=0.688, SD=0.163
	universal model score : M=0.938, SD=0.000
	personal + ALL universal : M=0.925, SD=0.025
	personal + CLUSTER universal : M=0.925, SD=0.025


	amount of personal data : 20 row
	personal model score : M=0.775, SD=0.031
	universal model score : M=0.938, SD=

	amount of personal data : 60 row
	personal model score : M=0.907, SD=0.033
	universal model score : M=0.933, SD=0.000
	personal + ALL universal : M=0.960, SD=0.033
	personal + CLUSTER universal : M=0.960, SD=0.033


	amount of personal data : 70 row
	personal model score : M=0.907, SD=0.033
	universal model score : M=0.933, SD=0.000
	personal + ALL universal : M=0.987, SD=0.027
	personal + CLUSTER universal : M=0.987, SD=0.027


	amount of personal data : 80 row
	personal model score : M=0.920, SD=0.027
	universal model score : M=0.933, SD=0.000
	personal + ALL universal : M=1.000, SD=0.000
	personal + CLUSTER universal : M=1.000, SD=0.000


	amount of personal data : 90 row
	personal model score : M=0.920, SD=0.027
	universal model score : M=0.933, SD=0.000
	personal + ALL universal : M=0.987, SD=0.027
	personal + CLUSTER universal : M=0.987, SD=0.027


	amount of personal data : 100 row
	personal model score : M=0.920, SD=0.027
	universal model score : M=0.933, SD=0.000
	personal + 

	amount of personal data : 40 row
	personal model score : M=0.908, SD=0.031
	universal model score : M=0.923, SD=0.000
	personal + ALL universal : M=0.969, SD=0.038
	personal + CLUSTER universal : M=0.985, SD=0.031


	amount of personal data : 50 row
	personal model score : M=0.877, SD=0.038
	universal model score : M=0.923, SD=0.000
	personal + ALL universal : M=0.923, SD=0.000
	personal + CLUSTER universal : M=1.000, SD=0.000


	amount of personal data : 60 row
	personal model score : M=0.892, SD=0.038
	universal model score : M=0.923, SD=0.000
	personal + ALL universal : M=0.938, SD=0.031
	personal + CLUSTER universal : M=0.938, SD=0.031


	amount of personal data : 70 row
	personal model score : M=0.892, SD=0.038
	universal model score : M=0.923, SD=0.000
	personal + ALL universal : M=0.985, SD=0.031
	personal + CLUSTER universal : M=0.985, SD=0.031


	amount of personal data : 80 row
	personal model score : M=0.892, SD=0.038
	universal model score : M=0.923, SD=0.000
	personal + A

ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.