In [27]:
%matplotlib inline

import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd 
import numpy as np

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import warnings
warnings.filterwarnings(action='ignore')

In [34]:
from General import *
from ReadingTheDataUtils import *
from Classifiers import *
from ExtaSensoryClassifiers import *

# Make the notebook automatically reload external python modules
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
from pathlib import Path

ROOT_PATH = Path('..')
SRC_PATH = Path('.')
DATA_PATH = ROOT_PATH / 'data'
CSV_PATH = DATA_PATH / 'ExtraSensory.per_uuid_features_labels'
CSV_SUFFIX = '.features_labels.csv'
ORIGINAL_LABLES_CSV_PATH = DATA_PATH / 'ExtraSensory.per_uuid_original_labels'
ORIGINAL_LABLES_CSV_SUFFIX = '.original_labels.csv'
FOLD_PATH = DATA_PATH / 'cv_5_folds'

# Data Exploration

In [4]:
data = pd.read_csv(DATA_PATH / 'dataset.csv', index_col='uuid')

In [5]:
data.head()

Unnamed: 0_level_0,timestamp,raw_acc:magnitude_stats:mean,raw_acc:magnitude_stats:std,raw_acc:magnitude_stats:moment3,raw_acc:magnitude_stats:moment4,raw_acc:magnitude_stats:percentile25,raw_acc:magnitude_stats:percentile50,raw_acc:magnitude_stats:percentile75,raw_acc:magnitude_stats:value_entropy,raw_acc:magnitude_stats:time_entropy,...,discrete:time_of_day:between0and6,discrete:time_of_day:between3and9,discrete:time_of_day:between6and12,discrete:time_of_day:between9and15,discrete:time_of_day:between12and18,discrete:time_of_day:between15and21,discrete:time_of_day:between18and24,discrete:time_of_day:between21and3,label,label_name
uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00EABED2-271D-49D8-B599-1D4A09240601,1444079161,0.996815,0.003529,-0.002786,0.006496,0.995203,0.996825,0.998502,1.748756,6.684605,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1,original_label:SITTING
00EABED2-271D-49D8-B599-1D4A09240601,1444079221,0.996864,0.004172,-0.00311,0.00705,0.994957,0.996981,0.998766,1.935573,6.684603,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1,original_label:SITTING
00EABED2-271D-49D8-B599-1D4A09240601,1444079281,0.996825,0.003667,0.003094,0.006076,0.994797,0.996614,0.998704,2.03178,6.684605,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1,original_label:SITTING
00EABED2-271D-49D8-B599-1D4A09240601,1444079341,0.996874,0.003541,0.000626,0.006059,0.99505,0.996907,0.99869,1.865318,6.684605,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1,original_label:SITTING
00EABED2-271D-49D8-B599-1D4A09240601,1444079431,0.997371,0.037653,0.043389,0.102332,0.995548,0.99686,0.998205,0.460806,6.683904,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1,original_label:SITTING


In [6]:
data.info(verbose=True, null_counts=True)

<class 'pandas.core.frame.DataFrame'>
Index: 377346 entries, 00EABED2-271D-49D8-B599-1D4A09240601 to FDAA70A1-42A3-4E3F-9AE3-3FDA412E03BF
Data columns (total 228 columns):
timestamp                                                                  377346 non-null int64
raw_acc:magnitude_stats:mean                                               377056 non-null float64
raw_acc:magnitude_stats:std                                                377056 non-null float64
raw_acc:magnitude_stats:moment3                                            377056 non-null float64
raw_acc:magnitude_stats:moment4                                            377056 non-null float64
raw_acc:magnitude_stats:percentile25                                       377056 non-null float64
raw_acc:magnitude_stats:percentile50                                       377056 non-null float64
raw_acc:magnitude_stats:percentile75                                       377056 non-null float64
raw_acc:magnitude_stats:value_entropy 

In [7]:
data['label'] = data['label'].astype('category')

for col in data.columns:
    if col.startswith('dicrete'):
        data[col] = data[col].astype('category')  

# Model 

### Learn all classifiers by folds 

In [8]:
train_folds_list, test_folds_list = get_folds_list(FOLD_PATH)

train_folds_list
test_folds_list

[['33A85C34-CFE4-4732-9E73-0A7AC861B27A',
  '40E170A7-607B-4578-AF04-F021C3B0384A',
  '481F4DD2-7689-43B9-A2AA-C8772227162B',
  '4E98F91F-4654-42EF-B908-A3389443F2E7',
  '59818CD2-24D7-4D32-B133-24C2FE3801E5',
  '59EEFAE0-DEB0-4FFF-9250-54D2A03D0CF2',
  '61359772-D8D8-480D-B623-7C636EAD0C81',
  '665514DE-49DC-421F-8DCB-145D0B2609AD',
  '74B86067-5D4B-43CF-82CF-341B76BEA0F4',
  '797D145F-3858-4A7F-A7C2-A4EB721E133C',
  '806289BC-AD52-4CC1-806C-0CDB14D65EB6',
  '81536B0A-8DBF-4D8A-AC24-9543E2E4C8E0',
  '86A4F379-B305-473D-9D83-FC7D800180EF',
  '96A358A0-FFF2-4239-B93E-C7425B901B47',
  '99B204C0-DD5C-4BB7-83E8-A37281B8D769',
  'A5A30F76-581E-4757-97A2-957553A2C6AA',
  'A7599A50-24AE-46A6-8EA6-2576F1011D81',
  'B7F9D634-263E-4A97-87F9-6FFB4DDCB36C',
  'BEF6C611-50DA-4971-A040-87FB979F3FC1',
  'CCAF77F0-FABB-4F2F-9E24-D56AD0C5A82F',
  'F50235E0-DD67-4F2A-B00B-1F31ADA998B9',
  '2C32C23E-E30C-498A-8DD2-0EFB9150A02E',
  '3600D531-0C55-44A7-AE95-A7A38519464E',
  '4FC32141-E888-4BFF-8804-12559A4

[['0BFC35E2-4817-4865-BFA7-764742302A2D',
  '0E6184E1-90C0-48EE-B25A-F1ECB7B9714E',
  '1155FF54-63D3-4AB2-9863-8385D0BD0A13',
  '1DBB0F6F-1F81-4A50-9DF4-CD62ACFA4842',
  '27E04243-B138-4F40-A164-F40B60165CF3',
  '00EABED2-271D-49D8-B599-1D4A09240601',
  '098A72A5-E3E5-4F54-A152-BBDA0DF7B694',
  '0A986513-7828-4D53-AA1F-E02D6DF9561B',
  '11B5EC4D-4133-4289-B475-4E737182A406',
  '136562B6-95B2-483D-88DC-065F28409FD2',
  '1538C99F-BA1E-4EFB-A949-6C7C47701B20',
  '24E40C4C-A349-4F9F-93AB-01D00FB994AF'],
 ['33A85C34-CFE4-4732-9E73-0A7AC861B27A',
  '40E170A7-607B-4578-AF04-F021C3B0384A',
  '481F4DD2-7689-43B9-A2AA-C8772227162B',
  '4E98F91F-4654-42EF-B908-A3389443F2E7',
  '59818CD2-24D7-4D32-B133-24C2FE3801E5',
  '2C32C23E-E30C-498A-8DD2-0EFB9150A02E',
  '3600D531-0C55-44A7-AE95-A7A38519464E',
  '4FC32141-E888-4BFF-8804-12559A491D8C',
  '5119D0F8-FCA8-4184-A4EB-19421A40DE0D',
  '5152A2DF-FAF3-4BA8-9CA9-E66B32671A53',
  '5EF64122-B513-46AE-BCF1-E62AAC285D2C',
  '61976C24-1C50-4355-9C49-AAE44A

In [None]:
classifiers = dict()

for train_fold_lst, test_fold_lst in zip(train_folds_list, test_folds_list):
    # Preprocess the data
    train_fold_df, test_fold_df = get_folds_train_and_test(data, train_fold_lst, test_fold_lst)
    X_fold_train, X_fold_test, y_fold_train, y_fold_test = \
                split_fold_data_to_features_and_lables(train_fold_df, test_fold_df)
    standard_X_train, standard_X_test = standard_data_scaling(X_fold_train, X_fold_test)
    
    handle_nulls_in_X(standard_X_train, standard_X_test)
    
    # Learn all models
    single_sensor_classifier = get_single_sensor_classifier(standard_X_train, y_fold_train)
    early_fusion_classifier =  get_early_fusion_classifier(standard_X_train, y_fold_train)
    late_fusion_average_classifier = get_late_fusion_average_classifier(standard_X_train, y_fold_train)
    late_fusion_learned_classifier = get_late_fusion_learned_classifier(standard_X_train, y_fold_train)
    
    # Put each fold result in the classifiers dict
    classifiers.setdefault('single_sensor_classifier', []).append(single_sensor_classifier)
    classifiers.setdefault('early_fusion_classifier', []).append(early_fusion_classifier)
    classifiers.setdefault('late_fusion_average_classifier', []).append(late_fusion_average_classifier)
    classifiers.setdefault('late_fusion_learned_classifier', []).append(late_fusion_learned_classifier)

get_single_sensor_classifier


In [None]:
classifiers

In [45]:
# Test
single_sensor_classifier = get_single_sensor_classifier(standard_X_train, y_fold_train)
early_fusion_classifier =  get_early_fusion_classifier(standard_X_train, y_fold_train)
late_fusion_average_classifier = get_late_fusion_average_classifier(standard_X_train, y_fold_train)
late_fusion_learned_classifier = get_late_fusion_learned_classifier(standard_X_train, y_fold_train)

get_single_sensor_classifier
y_train:
0    0.488469
1    0.368229
3    0.085158
4    0.055870
2    0.019791
6    0.015519
5    0.004025
Name: label, dtype: float64
(209232,)
y_validation:
0    0.488483
1    0.368226
3    0.085156
4    0.055871
2    0.019795
6    0.015518
5    0.004025
Name: label, dtype: float64
(103055,)


KeyboardInterrupt: 