## Import libraries

In [1]:
import numpy as np
import pandas as pd
import scipy as sc
import matplotlib.pyplot as plt
%matplotlib inline

## Histogram

In [2]:
data_wisdm = pd.read_csv("../data/features/expert_wisdm.csv")
data_uschad = pd.read_csv("../data/features/expert_uschad.csv")

In [3]:
results_wisdm_all = pd.read_csv("results/results_wisdm_all.csv", index_col=0)
results_uschad_all = pd.read_csv("results/results_uschad_all.csv", index_col=0)

In [4]:
def get_classDistrubution(data):
    n_classes = np.unique(data['activity']).shape[0]
    classes_nb = []
    for i in range(n_classes):
        classes_nb.append(np.sum(data['activity'] == i))

    return n_classes, np.array(classes_nb)

In [5]:
def get_filename(data_type, method, feature_type):
    filename = ["hist"]
    if data_type == "WISDM":
        filename.append("wisdm")
    elif data_type == "USC-HAD":
        filename.append("uschad")
    else:
        return "-"
    if method == "Logistic Regression":
        filename.append("lr")
    elif method == "Random Forest":
        filename.append("rf")
    elif method == "SVM":
        filename.append("svm")
    else:
        return "-"
    filename.append(feature_type)
    filename = '_'.join(filename)
    filename += ".png"
    
    return filename

In [6]:
def get_histogram(data, scores, method, data_type='WISDM', feature_type='all'):
    n_classes, classes_nb = get_classDistrubution(data)
    multi_score = scores[0]
    scores = scores[1:]
    labels = [('%.1f%%' % (100*label)) for label in scores]
    first_points = np.array([np.ceil(x*y) for x, y in zip(scores, classes_nb)])
    second_points = np.array([y-np.ceil(x*y) for x, y in zip(scores, classes_nb)])
    x_ticks = [str(i+1) for i in xrange(n_classes)]
    ind = np.arange(n_classes)
    width = 0.8
    if data_type == 'WISDM':
        fig = plt.figure(figsize=(5, 5))
    elif data_type == 'USC-HAD':
        fig = plt.figure(figsize=(10, 5))
    else:
        print("Mode is not correct!")
        return 0
    p1 = plt.bar(ind, first_points, width, color='#26d692')
    p2 = plt.bar(ind, second_points, width, color='#d62663', bottom=first_points)

    plt.ylabel('Objects number', fontsize=17)
    plt.xlabel('Class labels', fontsize=17)
    plt.title(method + "\n Accuracy: %.1f%%" % (100*multi_score), fontsize=19)
    plt.xticks(ind + width/2., x_ticks, fontsize=15)
    plt.yticks(fontsize=15)
    plt.xlim([-0.1, n_classes])
    plt.ylim([0, (first_points+second_points).max()*1.1])

    def autolabel(rects1, rects2, labels):
        """
        Attach a text label above each bar displaying its height
        """
        for rect1, rect2, label in zip(rects1, rects2, labels):
            height = rect1.get_height()+rect2.get_height()
            plt.text(rect1.get_x() + rect1.get_width()/2., 1.01*height,
                    label, ha='center', va='bottom', fontsize=15)

    autolabel(p1, p2, labels)
    fig.tight_layout()
    filename = get_filename(data_type, method, feature_type)
    fig.savefig("../doc/pics/" + filename)
    plt.close()

In [7]:
data = data_uschad.copy()
scores = results_uschad_all.loc['lr_all', :].values
method = "Logistic Regression"
data_type = "USC-HAD"
feature_type = "all"

get_histogram(data, scores, method, data_type, feature_type)

In [8]:
data = data_uschad.copy()
scores = results_uschad_all.loc['rf_all', :].values
method = "Random Forest"
data_type = "USC-HAD"
feature_type = "all"

get_histogram(data, scores, method, data_type, feature_type)

In [9]:
data = data_uschad.copy()
scores = results_uschad_all.loc['svm_all', :].values
method = "SVM"
data_type = "USC-HAD"
feature_type = "all"

get_histogram(data, scores, method, data_type, feature_type)

In [10]:
data = data_wisdm.copy()
scores = results_wisdm_all.loc['lr_all', :].values
method = "Logistic Regression"
data_type = "WISDM"
feature_type = "all"

get_histogram(data, scores, method, data_type, feature_type)

In [11]:
data = data_wisdm.copy()
scores = results_wisdm_all.loc['rf_all', :].values
method = "Random Forest"
data_type = "WISDM"
feature_type = "all"

get_histogram(data, scores, method, data_type, feature_type)

In [12]:
data = data_wisdm.copy()
scores = results_wisdm_all.loc['svm_all', :].values
method = "SVM"
data_type = "WISDM"
feature_type = "all"

get_histogram(data, scores, method, data_type, feature_type)