In [0]:
import pandas as pd
import numpy as np
from scipy import stats

path = 'Data/' #add the path of your data

In [0]:
#@title reading from drive (if you are using google collaboratory)
from google.colab import drive 
drive.mount('/content/gdrive')

path = 'gdrive/My Drive/challenge-up/'

# Complete Base

In [0]:
complete_base = pd.read_csv(path + 'CompleteDataSet.csv', header=[0])

In [0]:
complete_base = complete_base.loc[1:, ['TimeStamps','Subject','Activity','Trial','Tag']]

In [0]:
[ x for x in complete_base.duplicated('TimeStamps') if x is True ]

In [0]:
complete_base.dtypes

In [0]:
complete_base.TimeStamps = pd.to_datetime(complete_base.TimeStamps)
complete_base.Subject = pd.to_numeric(complete_base.Subject, downcast='integer')
complete_base.Activity = pd.to_numeric(complete_base.Activity, downcast='integer')
complete_base.Trial = pd.to_numeric(complete_base.Trial, downcast='integer')
complete_base.Tag = pd.to_numeric(complete_base.Tag, downcast='integer')

In [0]:
complete_base.dtypes

# Test base

In [0]:
base_test = pd.read_csv(path + 'CompleteDataSet_testing_competition.csv', header=[0])

In [0]:
base_test = base_test.loc[1:, ['TimeStamps']]

In [0]:
[ x for x in base_test.duplicated('TimeStamps') if x is True ]

In [0]:
base_test.dtypes

In [0]:
base_test['TimeStamps'] = pd.to_datetime(base_test.TimeStamps)

In [0]:
base_test.dtypes

# Bases join

In [0]:
join_base = base_test.merge(complete_base, on='TimeStamps')

In [0]:
join_base['Subject'].unique()

In [0]:
join_base.TimeStamps = join_base.TimeStamps.apply(lambda x: x.replace(microsecond=0))

In [0]:
join_base = join_base.loc[:, ['TimeStamps', 'Tag']]

In [0]:
join_base.dtypes

In [0]:
join_base = join_base.groupby(by='TimeStamps', as_index=False)['Tag'].agg(pd.Series.mode, )

In [0]:
join_base.dtypes

In [0]:
join_base['IsArray'] = join_base.Tag.apply(lambda x: isinstance(x,(list,pd.core.series.Series,np.ndarray)))

In [0]:
join_base = join_base.loc[join_base.IsArray == False].loc[:, ['TimeStamps','Tag']]

In [0]:
join_base.Tag = pd.to_numeric(join_base.Tag, downcast='integer')

# Submission base

In [0]:
submission_base = pd.read_csv(path + 'test-results.csv', header=[0])

In [0]:
submission_base.dtypes

In [0]:
submission_base['timestamp'] = pd.to_datetime(submission_base.timestamp, dayfirst=True)

In [0]:
submission_base = submission_base.rename({'timestamp': 'TimeStamps', 'class': 'Predicted'}, axis='columns')

# Joining test and predicted

In [0]:
submission_base.dtypes

In [0]:
result_base = join_base.merge(submission_base, on='TimeStamps')

In [0]:
result_base.shape

In [0]:
result_base.dtypes

# Verifying Score

In [0]:
from sklearn.metrics import confusion_matrix

y_test_category=result_base.Tag.values
y_test_predict_category=result_base.Predicted.values

labels=[1,2,3,4,5,6,7,8,9,10,11,20]

cnf_matrix2 = confusion_matrix(y_test_category, y_test_predict_category)
confusion_ma2 = pd.DataFrame(cnf_matrix2, columns=labels, index=labels)

metrics_table = pd.DataFrame(np.zeros((len(labels),8), dtype='int'), index=labels, 
                             columns=['tp', 'tn', 'fp', 'fn', 'precision', 'recall/sensitivity', 'specificity', 'f1-score'])

for i in range(len(labels)):
  metrics_table.iloc[i, 0] = cnf_matrix2[i,i]
  metrics_table.iloc[i, 1] = cnf_matrix2.diagonal().sum() - metrics_table.iloc[i, 0]
  metrics_table.iloc[i, 2] = cnf_matrix2[:,i].sum() - metrics_table.iloc[i, 0]
  metrics_table.iloc[i, 3] = cnf_matrix2[i,:].sum() - metrics_table.iloc[i, 0]
  
  metrics_table.iloc[i, 4] = metrics_table.iloc[i, 0] / (metrics_table.iloc[i, 0] + metrics_table.iloc[i, 2])
  metrics_table.iloc[i, 5] = metrics_table.iloc[i, 0] / (metrics_table.iloc[i, 0] + metrics_table.iloc[i, 3])
  metrics_table.iloc[i, 6] = metrics_table.iloc[i, 1] / (metrics_table.iloc[i, 1] + metrics_table.iloc[i, 2])
  metrics_table.iloc[i, 7] = 2*metrics_table.iloc[i, 0] / (2*metrics_table.iloc[i, 0] + metrics_table.iloc[i, 2] + metrics_table.iloc[i, 3])


u_precision = metrics_table.iloc[:, 4].mean()
u_recall = metrics_table.iloc[:, 5].mean()
  
metrics_table.loc[99] = [
    None,
    None,
    None,
    None,
    u_precision,
    u_recall,
    metrics_table.iloc[:, 6].mean(),
    2*((u_precision * u_recall) / (u_precision + u_recall))
]


metrics_table

In [0]:
confusion_ma2

In [0]:
from sklearn.metrics import accuracy_score

print(accuracy_score(y_test_category, y_test_predict_category))

In [0]:
average_precision = metrics_table['precision'].mean()
average_precision

In [0]:
average_recall = metrics_table['recall/sensitivity'].mean()
average_recall

In [0]:
global_f1 = 2*((average_precision * average_recall) / (average_precision + average_recall))
global_f1

In [0]:
import matplotlib.pyplot as plt
from sklearn.utils.multiclass import unique_labels

plt.rcParams['figure.figsize'] = [15, 10]
plt.rcParams['figure.dpi'] = 150

def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    #classes = classes[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return ax

  
print (y_test_category)
print(y_test_predict_category)
class_names = ['fHand','fKne','fBak','fSitCh','fSid','wak','stand','sit','pick',
          'jump','ly','kne']
class_names = [1,2,3,4,5,6,7,8,9,10,11,20]
# Plot non-normalized confusion matrix
plot_confusion_matrix(y_test_category, y_test_predict_category, classes=class_names,
                      title='')