In [2]:
"""ALGORITHM STEPS:

1) The first (standing up detection) RF classifier prediction output sets the 'avg_stand' 
feature in the dataframe passed to (2)

2) The main (position state) RF classifier runs and outputs predicted states

3) The "reconcile function" looks through the predicted states and for OCG and YMOUNT looks for any stand up
motions detected in the vicinity. It corrects accordingly

4) The HMM smooths the results to create clean sequences


** In future, step 1 could be replicated with other isolated movements (e.g. bridge, shrimp, sit up)

THOUGHT: is it possible to attempt to detect stand ups using a different time window?
"""

In [144]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import os
from hmmlearn import hmm

from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation
from sklearn.cross_validation import KFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.grid_search import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import PolynomialFeatures

from utilities import convert_to_words, print_full, get_position_stats, combine_csv, resolve_acc_gyro, blank_filter, concat_data

TIME_SEQUENCE_LENGTH = 50
DIR = os.path.dirname(os.path.realpath('__file__'))
polynomial_features = PolynomialFeatures(interaction_only=False, include_bias=True, degree=1)

In [185]:
def trial(df_train, test_data):
    """
    Test 1: 1s followed by 3s
    """
    my_test_data = test_data.drop(['avg_stand'], axis=1)
    y = df_train['state'].values
    X = df_train.drop(['avg_stand', 'stand', 'state', 'index'], axis=1)
    if X.isnull().values.any() == False: 

        rf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                max_depth=None, max_features='auto', max_leaf_nodes=None,
                min_samples_leaf=8, min_samples_split=4,
                min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1,
                oob_score=False, random_state=None, verbose=0,
                warm_start=False)

        X = polynomial_features.fit_transform(X)

        X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.1)

    else: 
        print "Found NaN values"

    rf.fit(X_train, y_train)

    p_test_data = polynomial_features.fit_transform(my_test_data)
    rf_pred2 = rf.predict(p_test_data)
    print rf_pred2
    test_data['state'] = rf_pred2
    final_prediction = convert_to_words(rf_pred2)
    print_full(final_prediction)
    get_position_stats(final_prediction)
    return test_data
    #print 'parameter list: {}'.format(polynomial_features.get_params())

In [180]:
def trial_standup(df_train, test_data):
    """
    Test 1: 1s followed by 3s
    """
    y = df_train['avg_stand'].values
    X = df_train.drop(['avg_stand', 'stand', 'state', 'index'], axis=1)
    if X.isnull().values.any() == False: 

        rf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                max_depth=None, max_features='auto', max_leaf_nodes=None,
                min_samples_leaf=8, min_samples_split=4,
                min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1,
                oob_score=False, random_state=None, verbose=0,
                warm_start=False)

        X = polynomial_features.fit_transform(X)

        X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.1)

    else: 
        print "Found NaN values"

    rf.fit(X_train, y_train)

    p_test_data = polynomial_features.fit_transform(test_data)
    print p_test_data
    rf_pred2 = rf.predict(p_test_data)
    print rf_pred2
    
    # Now we have the estimated stand_up values, we use them to create a new feature
    # in the original df
    #rf_pred3 = rf_pred2.astype(int)
    
    test_data['avg_stand'] = rf_pred2
    return test_data

In [339]:
fixed = reconciler(bar)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


     avg_stand  state  shift-8  shift-7  shift-6  shift-5  shift-4  shift-3  \
459          1      7        0        0        7        7        5        5   
499          1      5        0        0        0        0        7        7   

     shift-2  shift-1  shift0  shift1  shift2  shift3  shift4  shift5  shift6  \
459        5        5       7       7       5       5       5       5       5   
499        5        5       5       5       7       7       5       5       5   

     shift7  shift8  
459       5       7  
499       5       5  
[[7, 0, 7, 7, 5, 5, 5, 5, 7, 7, 5, 5, 5, 5, 5, 5, 7], [5, 0, 0, 0, 7, 7, 5, 5, 5, 5, 7, 7, 5, 5, 5, 5, 5]]
[459, 499]
ymount in vicinity
[7, 0, 7, 7, 5, 5, 5, 5, 7, 7, 5, 5, 5, 5, 5, 5, 7]
new values
[7, 5, 7, 7, 5, 5, 5, 5, 7, 7, 5, 5, 5, 5, 5, 5, 7]
index
0
        tiltx     tilty   ACCEL_X   ACCEL_Y   ACCEL_Z     GYRO_X     GYRO_Y  \
19  -1.312797  0.204198 -1.057500  0.171132  0.037447  -0.744553  12.915579   
39  -1.139110  0.133806 -1.007342 

In [337]:
def reconciler(df):
    relevant_df = df[['avg_stand', 'state']]
    #print relevant_df.state
    #print relevant_df.state.shift(0) # does nothing
    #print relevant_df.state.shift(1) # moves forward 1
    #print relevant_df.state.shift(-1) # moves back 1
    # find any instance of a standup
    # return a list of all the states around that standup
    # check if any of those states are "ymount"
    # if they are, change them to "ocg"
    for i in range(-8, 9):
        title = 'shift{}'.format(i)
        relevant_df[title] = relevant_df.state.shift(i).fillna(False)
        
    detected_standup_df = relevant_df.loc[relevant_df['avg_stand'] == 1].apply(lambda x: x.tolist(), axis=1)
    #print detected_standup_df
    
    surrounding_states = []
    surrounding_indexes = []

    for row in detected_standup_df.iterrows():
        index, data = row
        data_list = data.astype(int).tolist()
        data_list.pop(0) # first value is the 'avg_stand' feature which is not what we want
        data_list.pop(1) # 2nd value is the state which is not what we want
        surrounding_states.append(data_list)
        surrounding_indexes.append(index.tolist())
        
    print surrounding_states
    print surrounding_indexes
    
    # Now we check the surrounding states to see if there are any "ymount" values we
    # should convert to "ocg"
    new_values = []
    for index, sequence in enumerate(surrounding_states):
        for values in sequence:
            if (values == 0):
                print 'ymount in vicinity'
                print sequence
                new_values = [5 if x == 0 else x for x in sequence]
                print 'new values'
                print new_values
                print 'index'
                print index
                actual_index = surrounding_indexes[index]
                relevant_df = update_df(df, actual_index, new_values)
                
    return relevant_df

In [338]:
def update_df(df, index, new_values, reach=8):
    
    #print new_values # This is the value at the index (i.e. the row when the stand_up event was 1)
    
    # need to take this list and lay it over the values in the df at that index
    # TODO: catch indexing error
    lower5 = index - (5*20) # 5 because we shift 5 either way, 20 because we're sampling every 40 rows with 50% overlap
    upper5 = index + (5*20)
    #print df.ix[lower:upper]
    
    
    
    for x in range(0,reach):
        amount = reach - x
        i = index - (amount*20)
        df.loc[i, 'state'] = new_values[x]
    
    for y in range(0,reach):
        amount = reach - y
        i = index + (amount*20)
        df.loc[i, 'state'] = new_values[y+reach]
    
    print df
    return df
    #for new_vals, old_vals in zip(new_values, df.ix[lower:upper].state):
    #    df.ix[]

In [148]:
def root_sum_square(x, y, z):
        sum = ((x**2)+(y**2)+(z**2))
        rss = math.sqrt(sum)
        return rss

def root_mean_square(x, y, z):
        mean = ((x**2)+(y**2)+(z**2))/3
        rss = math.sqrt(mean)
        return rss

def tiltx(x, y, z):
    try:
        prep = (x/(math.sqrt((y**2)+(z**2))))
        tilt = math.atan(prep)
    except ZeroDivisionError:
        tilt = 0
    return tilt

def tilty(x, y, z):
    try:
        prep = (y/(math.sqrt((x**2)+(z**2))))
        tilt = math.atan(prep)
    except ZeroDivisionError:
        tilt = 0
    return tilt
    
def max_min_diff(max, min):
    diff = max - min
    return diff

def magnitude(x, y, z):
    magnitude = x + y + z
    return magnitude

def create_features(df, _window=50, test=False):
    accel_x = df['ACCEL_X'].astype(float)
    accel_y = df['ACCEL_Y'].astype(float)
    accel_z = df['ACCEL_Z'].astype(float)
    gyro_x = df['GYRO_X'].astype(float)
    gyro_y = df['GYRO_Y'].astype(float)
    gyro_z = df['GYRO_Z'].astype(float)
    
    df2 = pd.DataFrame()
    
    # capture tilt here, then average later
    
    df2['tiltx'] = df.apply(lambda x: tiltx(x['ACCEL_X'], x['ACCEL_Y'], x['ACCEL_Z']), axis=1)
    df2['tilty'] = df.apply(lambda x: tilty(x['ACCEL_X'], x['ACCEL_Y'], x['ACCEL_Z']), axis=1)
    
    # Capture stand state here, then average later
    
    if (test==False):
        df2['stand'] = df['stand'].astype(float)
    
    TIME_SEQUENCE_LENGTH = _window
    
    # Basics
    
    df2['ACCEL_X'] = pd.rolling_mean(accel_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['ACCEL_Y'] = pd.rolling_mean(accel_y, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['ACCEL_Z'] = pd.rolling_mean(accel_z, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['GYRO_X'] = pd.rolling_mean(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['GYRO_Y'] = pd.rolling_mean(gyro_y, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['GYRO_Z'] = pd.rolling_mean(gyro_z, TIME_SEQUENCE_LENGTH-2, center=True)
    
    # rolling median

    df2['rolling_median_x'] = pd.rolling_median(accel_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_median_y'] = pd.rolling_median(accel_y, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_median_z'] = pd.rolling_median(accel_z, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_median_gx'] = pd.rolling_median(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_median_gy'] = pd.rolling_median(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_median_gz'] = pd.rolling_median(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    
    # rolling max
    
    df2['rolling_max_x'] = pd.rolling_max(accel_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_max_y'] = pd.rolling_max(accel_y, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_max_z'] = pd.rolling_max(accel_z, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_max_gx'] = pd.rolling_max(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_max_gy'] = pd.rolling_max(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_max_gz'] = pd.rolling_max(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    
    # rolling min
    
    df2['rolling_min_x'] = pd.rolling_min(accel_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_min_y'] = pd.rolling_min(accel_y, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_min_z'] = pd.rolling_min(accel_z, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_min_gx'] = pd.rolling_min(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_min_gy'] = pd.rolling_min(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_min_gz'] = pd.rolling_min(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    
    # rolling sum
    
    df2['rolling_sum_x'] = pd.rolling_sum(accel_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_sum_y'] = pd.rolling_sum(accel_y, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_sum_z'] = pd.rolling_sum(accel_z, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_sum_gx'] = pd.rolling_sum(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_sum_gy'] = pd.rolling_sum(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_sum_gz'] = pd.rolling_sum(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    
    # standard deviation
    
    df2['rolling_std_x'] = pd.rolling_std(accel_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_std_y'] = pd.rolling_std(accel_y, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_std_z'] = pd.rolling_std(accel_z, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_std_gx'] = pd.rolling_std(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_std_gy'] = pd.rolling_std(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    df2['rolling_std_gz'] = pd.rolling_std(gyro_x, TIME_SEQUENCE_LENGTH-2, center=True)
    
    # Tilt
    df2['avg_tiltx'] = pd.rolling_mean(df2['tiltx'], TIME_SEQUENCE_LENGTH-2, center=True)
    df2['avg_tilty'] = pd.rolling_mean(df2['tilty'], TIME_SEQUENCE_LENGTH-2, center=True)
    
    
    if (test==False):
        # standing up detection
        df2['avg_stand'] = pd.rolling_mean(df2['stand'], TIME_SEQUENCE_LENGTH-2, center=True)
        print df2['avg_stand']

        # round standing up as we need it to be either '0' or '1' for training later
        df2['avg_stand'] = df2['avg_stand'].apply(lambda x: math.ceil(x))

    ol_upper = _window/2
    ol_lower = ol_upper-1
        
    new_df = df2[ol_lower::ol_upper] # 50% overlap with 30
    
    new_df['max_min_x'] = df2.apply(lambda x: max_min_diff(x['rolling_max_x'], x['rolling_min_x']), axis=1)
    new_df['max_min_y'] = df2.apply(lambda x: max_min_diff(x['rolling_max_y'], x['rolling_min_y']), axis=1)
    new_df['max_min_z'] = df2.apply(lambda x: max_min_diff(x['rolling_max_z'], x['rolling_min_z']), axis=1)
    new_df['max_min_gx'] = df2.apply(lambda x: max_min_diff(x['rolling_max_gx'], x['rolling_min_gx']), axis=1)
    new_df['max_min_gy'] = df2.apply(lambda x: max_min_diff(x['rolling_max_gy'], x['rolling_min_gy']), axis=1)
    new_df['max_min_gz'] = df2.apply(lambda x: max_min_diff(x['rolling_max_gz'], x['rolling_min_gz']), axis=1)
                                                                       
    new_df['acc_rss'] = df2.apply(lambda x: root_sum_square(x['ACCEL_X'], x['ACCEL_Y'], x['ACCEL_Z']), axis=1)
    new_df['gyro_rss'] = df2.apply(lambda x: root_sum_square(x['GYRO_X'], x['GYRO_Y'], x['GYRO_Z']), axis=1)
    
    new_df['acc_rms'] = df2.apply(lambda x: root_mean_square(x['ACCEL_X'], x['ACCEL_Y'], x['ACCEL_Z']), axis=1)
    new_df['gyro_rms'] = df2.apply(lambda x: root_mean_square(x['GYRO_X'], x['GYRO_Y'], x['GYRO_Z']), axis=1)
    
    new_df['acc_magnitude'] = df2.apply(lambda x: magnitude(x['ACCEL_X'], x['ACCEL_Y'], x['ACCEL_Z']), axis=1)
    new_df['gyro_magnitude'] = df2.apply(lambda x: magnitude(x['GYRO_X'], x['GYRO_Y'], x['GYRO_Z']), axis=1)
        
    return new_df


# Test method:
# data = np.array([np.mean(training_data.ACCEL_X[0:30]), np.mean(training_data.ACCEL_X[30:45]), np.mean(training_data.ACCEL_X[30:60])])
# desired_df = pd.DataFrame(data, columns=columns)
# print desired_df


In [149]:
def set_state(df, state):
    """set the state for training"""

    if state == 'your_mount':
        df['state'] = 0
    elif state == 'your_side_control':
        df['state'] = 1
    elif state =='your_closed_guard':
        df['state'] = 2
    elif state =='your_back_control':
        df['state'] = 3
    elif state =='opponent_mount_or_sc':
        df['state'] = 4
    elif state =='opponent_closed_guard':
        df['state'] = 5
    elif state == 'opponent_back_control':
        df['state'] = 6
    elif state =='non_jj':
        df['state'] = 7
        
    return df

In [150]:
def set_stand_state(df, stand_state):
    if (stand_state == 1):
        df['stand'] = 1
    else:
        df['stand'] = 0
    
    print df
    return df

In [151]:
def combine_setState_createFeatures(directory, state, window=50, stand=0):
    """
    convenience method to combine three steps in one function:
    (1) combine multiple csv files, (2) set their movement state for training,
    (3) combine to create time sequences and add features
    """
    combined_data = combine_csv(directory)
    combined_data_updated = set_state(combined_data, state)
    combined_data_updated2 = set_stand_state(combined_data_updated, stand)
    feature_training_data = create_features(combined_data_updated2, window)
    ready_training_data = set_state(feature_training_data, state)
    return ready_training_data

In [152]:
def prep(window=30):
    """prepare the raw sensor data"""

    #1 Your mount
    ymount_td = combine_setState_createFeatures('your_mount_raw_data', 'your_mount', window, 0)
    #2 Your side control
    ysc_td = combine_setState_createFeatures('your_side_control_raw_data', 'your_side_control', window, 0)
    #3 Your closed guard
    ycg_td = combine_setState_createFeatures('your_closed_guard_raw_data', 'your_closed_guard', window, 0)
    #4 Your back control
    ybc_td = combine_setState_createFeatures('your_back_control_raw_data', 'your_back_control', window, 0)
    #5 Opponent mount or opponent side control
    omountsc_td = combine_setState_createFeatures('opponent_mount_and_opponent_side_control_raw_data', 'opponent_mount_or_sc', window, 0)
    #6 Opponent closed guard
    ocg_td = combine_setState_createFeatures('opponent_closed_guard_raw_data', 'opponent_closed_guard', window, 0)
    #7 Opponent back control
    obc_td = combine_setState_createFeatures('opponent_back_control_raw_data', 'opponent_back_control', window, 0)
    #8 "Non jiu-jitsu" motion
    nonjj_td = combine_setState_createFeatures('non_jj_raw_data', 'non_jj', window, 0)
    #9 "stand up" motion
    stand_up_td = combine_setState_createFeatures('standing_up_raw_data', 'opponent_closed_guard', window, 1)

    training_data = concat_data([ymount_td, ysc_td, ycg_td, ybc_td, omountsc_td, ocg_td, obc_td, nonjj_td, stand_up_td])
    # remove NaN
    training_data = blank_filter(training_data)
    return training_data

In [160]:
def prep_test(el_file):
    el_file = 'data/test_cases/' + el_file
    df = pd.DataFrame()
    df = pd.read_csv(el_file, index_col=None, header=0)
    df = resolve_acc_gyro(df)
    df = create_features(df, _window=40, test=True)
    test_data = blank_filter(df)

    return test_data

In [111]:
def test_model_stand(df_train):
    """check model accuracy"""

    y = df_train['avg_stand'].values
    X = df_train.drop(['avg_stand', 'stand', 'state', 'index'], axis=1)
    
    if X.isnull().values.any() == False: 

        rf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                max_depth=None, max_features='auto', max_leaf_nodes=None,
                min_samples_leaf=8, min_samples_split=4,
                min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1,
                oob_score=False, random_state=None, verbose=0,
                warm_start=False)
        
        X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.1)
        
    rf.fit(X_train, y_train)
    rf_pred = rf.predict(X_test)
    rf_scores = cross_validation.cross_val_score(
    rf, X, df_train.state, cv=10, scoring='accuracy')
    
    print 'rf prediction: {}'.format(accuracy_score(y_test, rf_pred))
    print("Random Forest Accuracy: %0.2f (+/- %0.2f)" % (rf_scores.mean(), rf_scores.std() * 2))
    
    importances = rf.feature_importances_
    std = np.std([tree.feature_importances_ for tree in rf.estimators_],
             axis=0)
    indices = np.argsort(importances)[::-1]

    # Print the feature ranking
    print("Feature ranking:")
    for f in range(X.shape[1]):
        print("%d. feature %s (%f)" % (f + 1, X.columns[indices[f]], importances[indices[f]]))
    

In [112]:
def test_model(df_train):
    """check model accuracy"""

    y = df_train['state'].values
    X = df_train.drop(['state', 'index'], axis=1)
    
    if X.isnull().values.any() == False: 

        rf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                max_depth=None, max_features='auto', max_leaf_nodes=None,
                min_samples_leaf=8, min_samples_split=4,
                min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1,
                oob_score=False, random_state=None, verbose=0,
                warm_start=False)
        
        X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.1)
        
    rf.fit(X_train, y_train)
    rf_pred = rf.predict(X_test)
    rf_scores = cross_validation.cross_val_score(
    rf, X, df_train.state, cv=10, scoring='accuracy')
    
    print 'rf prediction: {}'.format(accuracy_score(y_test, rf_pred))
    print("Random Forest Accuracy: %0.2f (+/- %0.2f)" % (rf_scores.mean(), rf_scores.std() * 2))
    
    importances = rf.feature_importances_
    std = np.std([tree.feature_importances_ for tree in rf.estimators_],
             axis=0)
    indices = np.argsort(importances)[::-1]

    # Print the feature ranking
    print("Feature ranking:")
    for f in range(X.shape[1]):
        print("%d. feature %s (%f)" % (f + 1, X.columns[indices[f]], importances[indices[f]]))

In [59]:
training_data30 = prep(30)

['/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/DIO_YMOUNT.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/GL_ymount_CS.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/GL_ymount_UrsWearing.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/ymount.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/ymountUrs.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/ymountUrs2.csv']
       index             timestamp_x  ACCEL_X  ACCEL_Y  ACCEL_Z  \
0          0  2016-03-26 13:33:12.67   -0.358    0.636    0.721   
1          1  2016-03-26 13:33:12.67   -0.071   -0.115   -1.280   
2          2  2016-03-26 13:33:12.67    0.333    0.322   -0.766   
3          3  2016-03-26 13:33:12.67    0.015   -0.092   -0.526   
4          4  2016-03-26 13:33:12

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


['/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/DIO_YSC.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/GL_ysc_UrsWearing.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/ysc1.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/ysc2.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/yscUrs.csv']
      index             timestamp_x  ACCEL_X  ACCEL_Y  ACCEL_Z  \
0         0  2016-03-26 13:29:58.62   -0.646   -0.181   -0.770   
1         1  2016-03-26 13:29:58.62   -0.841   -0.167   -0.888   
2         2  2016-03-26 13:29:58.62   -0.788   -0.104   -0.893   
3         3  2016-03-26 13:29:58.62   -0.623   -0.141   -0.765   
4         4  2016-03-26 13:29:58.62   -0.512   -0.071   -0.807   
5         5  2016-03-26 13:29:58.62   -0.574   -0.069   -0.8

In [15]:
#print training_data50.stand.describe()

In [84]:
training_data = prep(30)
training_data10 = prep(10)
training_data20 = prep(20)
training_data26 = prep(26) # need numbers that divide into 2 easily
training_data36 = prep(36) # need numbers that divide into 2 easily
training_data40 = prep(40)
training_data50 = prep(50)
training_data56 = prep(56)
training_data60 = prep(60)
training_data64 = prep(64)
training_data70 = prep(70)


['/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/DIO_YMOUNT.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/GL_ymount_CS.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/GL_ymount_UrsWearing.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/ymount.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/ymountUrs.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/ymountUrs2.csv']
       index             timestamp_x  ACCEL_X  ACCEL_Y  ACCEL_Z  \
0          0  2016-03-26 13:33:12.67   -0.358    0.636    0.721   
1          1  2016-03-26 13:33:12.67   -0.071   -0.115   -1.280   
2          2  2016-03-26 13:33:12.67    0.333    0.322   -0.766   
3          3  2016-03-26 13:33:12.67    0.015   -0.092   -0.526   
4          4  2016-03-26 13:33:12

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


['/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/DIO_YSC.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/GL_ysc_UrsWearing.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/ysc1.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/ysc2.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/yscUrs.csv']
      index             timestamp_x  ACCEL_X  ACCEL_Y  ACCEL_Z  \
0         0  2016-03-26 13:29:58.62   -0.646   -0.181   -0.770   
1         1  2016-03-26 13:29:58.62   -0.841   -0.167   -0.888   
2         2  2016-03-26 13:29:58.62   -0.788   -0.104   -0.893   
3         3  2016-03-26 13:29:58.62   -0.623   -0.141   -0.765   
4         4  2016-03-26 13:29:58.62   -0.512   -0.071   -0.807   
5         5  2016-03-26 13:29:58.62   -0.574   -0.069   -0.8

ERROR: Internal Python error in the inspect module.
Below is the traceback from this internal error.


Unfortunately, your original traceback can not be constructed.



TypeError: 'NoneType' object is not iterable

In [86]:
print training_data20

       index     tiltx     tilty  stand   ACCEL_X   ACCEL_Y   ACCEL_Z  \
0          9 -0.374351 -0.205611      0 -0.240000 -0.051056 -0.857222   
1         19 -0.260666  0.357542      0 -0.413722  0.051000 -0.900833   
2         29 -0.328777 -0.155572      0 -0.328000  0.050278 -0.947389   
3         39 -0.187004  0.056524      0 -0.300667  0.159222 -0.949611   
4         49  0.078648  0.178678      0 -0.219056  0.186778 -0.940833   
5         59 -0.190364  0.252252      0 -0.237444  0.053333 -0.942222   
6         69 -0.335886  0.218027      0 -0.302222  0.022389 -0.965222   
7         79 -0.275254  0.042607      0 -0.145333 -0.003611 -0.958667   
8         89 -0.233521  0.056673      0 -0.074389  0.172833 -0.937556   
9         99  0.442236  0.669933      0 -0.023722  0.341556 -0.902556   
10       109 -0.779327 -0.243355      0 -0.111889  0.210389 -0.981500   
11       119 -0.253330  0.365558      0 -0.007111  0.187278 -0.959500   
12       129  0.277978 -0.091599      0  0.092111  

In [18]:
#1 Your mount
ymount_td = combine_setState_createFeatures('your_mount_raw_data', 'your_mount')
#2 Your side control
ysc_td = combine_setState_createFeatures('your_side_control_raw_data', 'your_side_control')
#3 Your closed guard
ycg_td = combine_setState_createFeatures('your_closed_guard_raw_data', 'your_closed_guard')
#4 Your back control
ybc_td = combine_setState_createFeatures('your_back_control_raw_data', 'your_back_control')
#5 Opponent mount or opponent side control
omountsc_td = combine_setState_createFeatures('opponent_mount_and_opponent_side_control_raw_data', 'opponent_mount_or_sc')
#6 Opponent closed guard
ocg_td = combine_setState_createFeatures('opponent_closed_guard_raw_data', 'opponent_closed_guard')
#7 Opponent back control
obc_td = combine_setState_createFeatures('opponent_back_control_raw_data', 'opponent_back_control')
#8 "Non jiu-jitsu" motion
nonjj_td = combine_setState_createFeatures('non_jj_raw_data', 'non_jj')

['/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/DIO_YMOUNT.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/GL_ymount_CS.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/GL_ymount_UrsWearing.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/ymount.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/ymountUrs.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_mount_raw_data/ymountUrs2.csv']
       index             timestamp_x  ACCEL_X  ACCEL_Y  ACCEL_Z  \
0          0  2016-03-26 13:33:12.67   -0.358    0.636    0.721   
1          1  2016-03-26 13:33:12.67   -0.071   -0.115   -1.280   
2          2  2016-03-26 13:33:12.67    0.333    0.322   -0.766   
3          3  2016-03-26 13:33:12.67    0.015   -0.092   -0.526   
4          4  2016-03-26 13:33:12

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


['/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/DIO_YSC.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/GL_ysc_UrsWearing.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/ysc1.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/ysc2.csv', '/Users/christophersamiullah/repos/sensor_readings/ML_Sandbox/data/your_side_control_raw_data/yscUrs.csv']
      index             timestamp_x  ACCEL_X  ACCEL_Y  ACCEL_Z  \
0         0  2016-03-26 13:29:58.62   -0.646   -0.181   -0.770   
1         1  2016-03-26 13:29:58.62   -0.841   -0.167   -0.888   
2         2  2016-03-26 13:29:58.62   -0.788   -0.104   -0.893   
3         3  2016-03-26 13:29:58.62   -0.623   -0.141   -0.765   
4         4  2016-03-26 13:29:58.62   -0.512   -0.071   -0.807   
5         5  2016-03-26 13:29:58.62   -0.574   -0.069   -0.8

In [171]:
test_data1 = prep_test('test1_ymount_ycg.csv')
test_data4 = prep_test('GL_TEST1_CS.csv')
test_data5 = prep_test('GL_TEST2_CS.csv')
test_data6 = prep_test('GL_TEST3_CS_very_still.csv')
test_data7 = prep_test('GL_TEST1_UrsWearing.csv')
test_data8 = prep_test('DIO_YCG_YMOUNT_YSC.csv')
test_data9 = prep_test('DIO_OCG_YCG_YMOUNT_YBC.csv')

test_data100 = prep_test('CS_OCG_STAND_OCG.csv')
test_data101 = prep_test('URS_OCG_STAND_OCG1.csv')
test_data102 = prep_test('CS_OCG_STAND_OCG2.csv')
test_data103 = prep_test('URS_OCG_STAND_OCG2.csv')
test_data104 = prep_test('CS_OCG_STAND_OCG3.csv')
test_data105 = prep_test('URS_OCG_STAND_OCG3.csv')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Removed 2 NaN rows
Removed 2 NaN rows
Removed 1 NaN rows
Removed 4 NaN rows
Removed 1 NaN rows
Removed 0 NaN rows
Removed 1 NaN rows
Removed 1 NaN rows
Removed 1 NaN rows
Removed 2 NaN rows
Removed 1 NaN rows
Removed 1 NaN rows
Removed 2 NaN rows


In [24]:
print training_data.columns
print test_data1.columns

Index([u'index', u'tiltx', u'tilty', u'stand', u'ACCEL_X', u'ACCEL_Y',
       u'ACCEL_Z', u'GYRO_X', u'GYRO_Y', u'GYRO_Z', u'rolling_median_x',
       u'rolling_median_y', u'rolling_median_z', u'rolling_median_gx',
       u'rolling_median_gy', u'rolling_median_gz', u'rolling_max_x',
       u'rolling_max_y', u'rolling_max_z', u'rolling_max_gx',
       u'rolling_max_gy', u'rolling_max_gz', u'rolling_min_x',
       u'rolling_min_y', u'rolling_min_z', u'rolling_min_gx',
       u'rolling_min_gy', u'rolling_min_gz', u'rolling_sum_x',
       u'rolling_sum_y', u'rolling_sum_z', u'rolling_sum_gx',
       u'rolling_sum_gy', u'rolling_sum_gz', u'rolling_std_x',
       u'rolling_std_y', u'rolling_std_z', u'rolling_std_gx',
       u'rolling_std_gy', u'rolling_std_gz', u'avg_tiltx', u'avg_tilty',
       u'avg_stand', u'max_min_x', u'max_min_y', u'max_min_z', u'max_min_gx',
       u'max_min_gy', u'max_min_gz', u'acc_rss', u'gyro_rss', u'acc_rms',
       u'gyro_rms', u'acc_magnitude', u'gyro_magnitude

In [35]:
pre_smooth = trial(training_data, test_data1)

KeyboardInterrupt: 

In [340]:
# This is a full sequence
# step 1
test_data100 = prep_test('CS_OCG_STAND_OCG.csv')
foo = trial_standup(training_data40, test_data101)
print foo

# step 2
bar = trial(training_data40, foo)
print bar

# step 3

fixed = reconciler(bar)
print fixed

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Removed 1 NaN rows
[[  1.00000000e+00  -1.31279714e+00   2.04198322e-01 ...,   8.68894565e+00
   -8.48921053e-01   4.48163158e+00]
 [  1.00000000e+00  -1.13911007e+00   1.33805537e-01 ...,   9.43267717e+00
   -1.00513158e+00   2.29991579e+01]
 [  1.00000000e+00  -1.05687718e+00   5.13801631e-01 ...,   6.47864336e+00
   -7.91157895e-01  -2.76623684e+00]
 ..., 
 [  1.00000000e+00  -1.01923408e+00   3.77077830e-01 ...,   6.14529498e+00
   -9.90736842e-01  -5.08497368e+00]
 [  1.00000000e+00  -1.43059515e+00   1.32475552e-01 ...,   7.03569237e+00
   -1.07563158e+00  -9.72236842e+00]
 [  1.00000000e+00  -1.02209441e+00   9.03828296e-03 ...,   3.53437385e+00
   -1.27592105e+00  -9.32236842e-01]]
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  1.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.]
        tiltx     tilty   ACCEL_X   ACCEL_Y   ACCEL_Z     GYRO_X     GYRO_Y  \
19  -1.312797  0.204198 -1.057500  0.171132  0.037447  -0.744553  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


        tiltx     tilty   ACCEL_X   ACCEL_Y   ACCEL_Z     GYRO_X     GYRO_Y  \
19  -1.312797  0.204198 -1.057500  0.171132  0.037447  -0.744553  12.915579   
39  -1.139110  0.133806 -1.007342  0.216895 -0.214684   0.417237  13.717947   
59  -1.056877  0.513802 -0.987658  0.345421 -0.148921  -1.891789  -8.246105   
79  -1.469734  0.058936 -0.999184  0.176895 -0.229868  -6.288579  15.564921   
99  -1.182664  0.139387 -0.954605  0.240368 -0.323684  -3.823868   1.566184   
119 -1.059996  0.460293 -0.945658  0.368947 -0.277237   7.594632  -6.928868   
139 -1.292107  0.180298 -1.011763  0.141421 -0.252316 -11.035053  -1.859842   
159 -1.333681  0.231458 -0.966263  0.228132 -0.254763   4.948605   3.514132   
179 -0.971726  0.406607 -0.938211  0.337421 -0.309711   7.594737   0.025711   
199 -1.520475  0.049277 -1.054763  0.182684 -0.077868  -4.136632 -18.997105   
219 -1.373731 -0.155270 -1.062842  0.004368 -0.088263  -1.249974  16.612605   
239 -0.625393  0.516926 -0.708816  0.211895 -0.45826

        tiltx     tilty   ACCEL_X   ACCEL_Y   ACCEL_Z     GYRO_X     GYRO_Y  \
19  -1.312797  0.204198 -1.057500  0.171132  0.037447  -0.744553  12.915579   
39  -1.139110  0.133806 -1.007342  0.216895 -0.214684   0.417237  13.717947   
59  -1.056877  0.513802 -0.987658  0.345421 -0.148921  -1.891789  -8.246105   
79  -1.469734  0.058936 -0.999184  0.176895 -0.229868  -6.288579  15.564921   
99  -1.182664  0.139387 -0.954605  0.240368 -0.323684  -3.823868   1.566184   
119 -1.059996  0.460293 -0.945658  0.368947 -0.277237   7.594632  -6.928868   
139 -1.292107  0.180298 -1.011763  0.141421 -0.252316 -11.035053  -1.859842   
159 -1.333681  0.231458 -0.966263  0.228132 -0.254763   4.948605   3.514132   
179 -0.971726  0.406607 -0.938211  0.337421 -0.309711   7.594737   0.025711   
199 -1.520475  0.049277 -1.054763  0.182684 -0.077868  -4.136632 -18.997105   
219 -1.373731 -0.155270 -1.062842  0.004368 -0.088263  -1.249974  16.612605   
239 -0.625393  0.516926 -0.708816  0.211895 -0.45826

In [None]:
bar = trial_standup(training_data50, test_data101)

In [None]:
test_model_stand(training_data50) # newest with tilt

In [129]:
print '*** 30 rows ***'
test_model(training_data)

print '*** 10 rows ***'
test_model(training_data10)

print '*** 20 rows ***'
test_model(training_data20)

print '*** 26 rows ***'
test_model(training_data26)

print '*** 36 rows ***'
test_model(training_data36)

print '*** 40 rows ***'
test_model(training_data40)

print '*** 50 rows ***'
test_model(training_data50)

print '*** 60 rows ***'
test_model(training_data60)

print '*** 56 rows ***'
test_model(training_data56)

print '*** 64 rows ***'
test_model(training_data64)

print '*** 70 rows ***'
test_model(training_data70)

*** 30 rows ***
rf prediction: 0.842741935484
Random Forest Accuracy: 0.72 (+/- 0.23)
Feature ranking:
1. feature rolling_max_z (0.065786)
2. feature ACCEL_Z (0.065241)
3. feature rolling_sum_z (0.061869)
4. feature rolling_median_z (0.056797)
5. feature rolling_min_z (0.055261)
6. feature ACCEL_X (0.039785)
7. feature rolling_sum_x (0.036704)
8. feature avg_tiltx (0.034615)
9. feature rolling_max_x (0.031817)
10. feature rolling_min_x (0.031338)
11. feature acc_magnitude (0.030131)
12. feature stand (0.029087)
13. feature rolling_std_x (0.026984)
14. feature max_min_x (0.026543)
15. feature rolling_median_x (0.026307)
16. feature avg_stand (0.026199)
17. feature max_min_y (0.023757)
18. feature acc_rss (0.020202)
19. feature acc_rms (0.018855)
20. feature rolling_std_y (0.018715)
21. feature tiltx (0.015950)
22. feature avg_tilty (0.015802)
23. feature rolling_sum_y (0.015252)
24. feature ACCEL_Y (0.014943)
25. feature max_min_z (0.014838)
26. feature rolling_max_y (0.014547)
27. feat

In [None]:
print training_data70

In [None]:
pre_smooth2 = trial(training_data, test_data4)

In [None]:
#pre_smooth3 = trial(training_data, test_data5)

In [None]:
#pre_smooth4 = trial(training_data, test_data6)

In [None]:
#pre_smooth5 = trial(training_data, test_data7)

In [None]:
print pre_smooth
pre_smooth_words = convert_to_words(pre_smooth)
pre_smooth_words2 = convert_to_words(pre_smooth2)
#pre_smooth_words3 = convert_to_words(pre_smooth3)
#pre_smooth_words4 = convert_to_words(pre_smooth4)
#pre_smooth_words5 = convert_to_words(pre_smooth5)
print pre_smooth_words

In [None]:
n_components = 8 # ('ybc', 'ymount', 'ysc', 'ycg', 'ocg', 'osc_mount', 'obc', 'other')
# n_components = 3
startprob = np.array([0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.65,]) # users will probably turn on sensor standing
# startprob = np.array([0.34, 0.33, 0.33])

In [None]:
# transmat = np.array([[0.34, 0.33, 0.33], [0.9, 0.05, 0.05], [0.9, 0.05, 0.05]])

"""
probability of these positions given current state:

your_mount' if v == 0 
else 'your_side_control' if v == 1
else 'your_closed_guard' if v == 2
else 'your_back_control' if v == 3
else 'opponent_mount_or_sc' if v == 4
else 'opponent_closed_guard' if v == 5
else 'opponent_back_control' if v == 6
else 'OTHER' if v == 7

transition_probability = {
        'ymt' : {'ymount': 0.800, 'ysc': 0.050, 'ycg': 0.010, 'ybc': 0.050, 'osc_mount': 0.001, 'ocg': 0.050, 'obc': 0.001, 'other': 0.038},
        'ysc' : {'ymount': 0.100, 'ysc': 0.800, 'ycg': 0.010, 'ybc': 0.010, 'osc_mount': 0.001, 'ocg': 0.050, 'obc': 0.001, 'other': 0.028},
        'ycg' : {'ymount': 0.010, 'ysc': 0.050, 'ycg': 0.800, 'ybc': 0.010, 'osc_mount': 0.050, 'ocg': 0.001, 'obc': 0.001, 'other': 0.078},
        'ybc' : {'ymount': 0.050, 'ysc': 0.010, 'ycg': 0.050, 'ybc': 0.800, 'osc_mount': 0.001, 'ocg': 0.010, 'obc': 0.001, 'other': 0.078},
        'omt' : {'ymount': 0.001, 'ysc': 0.050, 'ycg': 0.010, 'ybc': 0.001, 'osc_mount': 0.800, 'ocg': 0.050, 'obc': 0.050, 'other': 0.038},
        'ocg' : {'ymount': 0.100, 'ysc': 0.050, 'ycg': 0.010, 'ybc': 0.010, 'osc_mount': 0.001, 'ocg': 0.800, 'obc': 0.001, 'other': 0.028},
        'obc' : {'ymount': 0.010, 'ysc': 0.050, 'ycg': 0.001, 'ybc': 0.010, 'osc_mount': 0.050, 'ocg': 0.001, 'obc': 0.800, 'other': 0.078},
        'oth' : {'ymount': 0.050, 'ysc': 0.010, 'ycg': 0.050, 'ybc': 0.078, 'osc_mount': 0.001, 'ocg': 0.010, 'obc': 0.001, 'other': 0.800}
     }
"""

transmat = np.array([
                    [0.800, 0.050, 0.010, 0.050, 0.001, 0.050, 0.001, 0.038], 
                    [0.100, 0.800, 0.010, 0.010, 0.001, 0.050, 0.001, 0.028], 
                    [0.010, 0.050, 0.800, 0.010, 0.050, 0.001, 0.001, 0.078], 
                    [0.050, 0.010, 0.050, 0.800, 0.001, 0.010, 0.001, 0.078],
                    [0.001, 0.050, 0.010, 0.001, 0.800, 0.050, 0.050, 0.038],
                    [0.100, 0.050, 0.010, 0.010, 0.001, 0.800, 0.001, 0.028],
                    [0.010, 0.050, 0.001, 0.010, 0.050, 0.001, 0.800, 0.078],
                    [0.050, 0.010, 0.050, 0.078, 0.001, 0.010, 0.001, 0.800],
                    ])

In [None]:
# emissionprob = np.array([[0.34, 0.33, 0.33], [0.4, 0.55, 0.05], [0.05, 0.55, 0.4]])

"""

probability of these positions given current state:

your_mount' if v == 0 
else 'your_side_control' if v == 1
else 'your_closed_guard' if v == 2
else 'your_back_control' if v == 3
else 'opponent_mount_or_sc' if v == 4
else 'opponent_closed_guard' if v == 5
else 'opponent_back_control' if v == 6
else 'OTHER' if v == 7

emission_probability = {
        'ymt' : {'ymount': 0.500, 'ysc': 0.050, 'ycg': 0.010, 'ybc': 0.050, 'osc_mount': 0.001, 'ocg': 0.350, 'obc': 0.001, 'other': 0.038},
        'ysc' : {'ymount': 0.100, 'ysc': 0.800, 'ycg': 0.010, 'ybc': 0.010, 'osc_mount': 0.001, 'ocg': 0.050, 'obc': 0.001, 'other': 0.028},
        'ycg' : {'ymount': 0.010, 'ysc': 0.050, 'ycg': 0.400, 'ybc': 0.010, 'osc_mount': 0.500, 'ocg': 0.001, 'obc': 0.001, 'other': 0.078},
        'ybc' : {'ymount': 0.050, 'ysc': 0.010, 'ycg': 0.050, 'ybc': 0.600, 'osc_mount': 0.001, 'ocg': 0.010, 'obc': 0.201, 'other': 0.078},
        'omt' : {'ymount': 0.001, 'ysc': 0.050, 'ycg': 0.210, 'ybc': 0.050, 'osc_mount': 0.600, 'ocg': 0.050, 'obc': 0.001, 'other': 0.038},
        'ocg' : {'ymount': 0.400, 'ysc': 0.050, 'ycg': 0.010, 'ybc': 0.010, 'osc_mount': 0.001, 'ocg': 0.400, 'obc': 0.001, 'other': 0.028},
        'obc' : {'ymount': 0.010, 'ysc': 0.050, 'ycg': 0.001, 'ybc': 0.110, 'osc_mount': 0.050, 'ocg': 0.001, 'obc': 0.700, 'other': 0.078},
        'oth' : {'ymount': 0.050, 'ysc': 0.010, 'ycg': 0.050, 'ybc': 0.078, 'osc_mount': 0.001, 'ocg': 0.010, 'obc': 0.001, 'other': 0.800}
     }
"""

emissionprob = np.array([
                        [0.500, 0.050, 0.010, 0.050, 0.001, 0.350, 0.001, 0.038], 
                        [0.100, 0.800, 0.010, 0.010, 0.001, 0.050, 0.001, 0.028], 
                        [0.010, 0.050, 0.350, 0.010, 0.500, 0.001, 0.001, 0.078], 
                        [0.050, 0.010, 0.050, 0.700, 0.001, 0.010, 0.101, 0.078],
                        [0.001, 0.050, 0.210, 0.050, 0.600, 0.050, 0.001, 0.038],
                        [0.400, 0.050, 0.010, 0.010, 0.001, 0.400, 0.001, 0.028],
                        [0.010, 0.050, 0.001, 0.110, 0.050, 0.001, 0.700, 0.078],
                        [0.050, 0.010, 0.050, 0.078, 0.001, 0.010, 0.001, 0.800],
                        ])


In [None]:
# Hidden Markov Model with multinomial (discrete) emissions
model = hmm.MultinomialHMM(n_components=n_components,
                           n_iter=10,
                           verbose=False)

model.startprob_ = startprob
model.transmat_ = transmat
model.emissionprob_ = emissionprob
# model.n_features = 8

In [None]:
# observations = np.array([1, 1, 2, 2, 1, 0, 1, 2, 2, 0])
observations = np.array(pre_smooth)
observations2 = np.array(pre_smooth2)
#observations3 = np.array(pre_smooth3)
#observations4 = np.array(pre_smooth4)
#observations5 = np.array(pre_smooth5)
a,b = model.sample(5)
print a,b
print '=========='

n_samples = len(observations)
data = observations.reshape((n_samples, -1))
print data

n_samples2 = len(observations2)
data2 = observations2.reshape((n_samples2, -1))

#n_samples3 = len(observations3)
#data3 = observations3.reshape((n_samples3, -1))

#n_samples4 = len(observations4)
#data4 = observations4.reshape((n_samples4, -1))

#n_samples5 = len(observations5)
#data5 = observations5.reshape((n_samples5, -1))

In [None]:
# decode(X, lengths=None, algorithm=None)[source]
# Find most likely state sequence corresponding to X.
# Will work best for organic tests

"""correct sequence
your_mount' if v == 0 
else 'your_side_control' if v == 1
else 'your_closed_guard' if v == 2
else 'your_back_control' if v == 3
else 'opponent_mount_or_sc' if v == 4
else 'opponent_closed_guard' if v == 5
else 'opponent_back_control' if v == 6
else 'OTHER' if v == 7


[3, 0, 1, 2, 5, 4, 6]

"""

In [None]:
print 'TEST 1'

result = model.decode(data, algorithm='viterbi')
print 'pre smooth: {}'.format(pre_smooth)
print 'result accuracy {}%'.format(result[0])
print 'final result: {}'.format(result[1])

result_words = convert_to_words(result[1])
print '====================='
print 'pre smooth words: {}'.format(pre_smooth_words)
print '====================='
print 'result words: {}'.format(result_words)

print '\n'
print "pre smooth stats"
print get_position_stats(pre_smooth_words)

print '\n'

print 'result stats'
print get_position_stats(result_words)

print '******************'




In [None]:
print 'TEST2'
result2 = model.decode(data2, algorithm='viterbi')
print 'pre smooth: {}'.format(pre_smooth2)
print 'result accuracy {}%'.format(result2[0])
print 'final result: {}'.format(result2[1])

result_words2 = convert_to_words(result2[1])
print '====================='
print 'pre smooth words: {}'.format(pre_smooth_words2)
print '====================='
print 'result words: {}'.format(result_words2)

print '\n'
print "pre smooth stats"
print get_position_stats(pre_smooth_words2)

print '\n'

print 'result stats'
print get_position_stats(result_words2)

print '******************'

In [None]:
"""
print 'TEST3'
result3 = model.decode(data3, algorithm='viterbi')
print 'pre smooth: {}'.format(pre_smooth3)
print 'result accuracy {}%'.format(result3[0])
print 'final result: {}'.format(result3[1])

result_words3 = convert_to_words(result3[1])
print '====================='
print 'pre smooth words: {}'.format(pre_smooth_words3)
print '====================='
print 'result words: {}'.format(result_words3)

print '\n'
print "pre smooth stats"
print get_position_stats(pre_smooth_words3)

print '\n'

print 'result stats'
print get_position_stats(result_words3)

print '******************'
"""

In [None]:
"""
print 'TEST4'
result4 = model.decode(data4, algorithm='viterbi')
print 'pre smooth: {}'.format(pre_smooth4)
print 'result accuracy {}%'.format(result4[0])
print 'final result: {}'.format(result4[1])

result_words4 = convert_to_words(result4[1])
print '====================='
print 'pre smooth words: {}'.format(pre_smooth_words4)
print '====================='
print 'result words: {}'.format(result_words4)

print '\n'
print "pre smooth stats"
print get_position_stats(pre_smooth_words4)

print '\n'

print 'result stats'
print get_position_stats(result_words4)

print '******************'
"""

In [None]:
"""
print 'TEST5'
result5 = model.decode(data5, algorithm='viterbi')
print 'pre smooth: {}'.format(pre_smooth5)
print 'result accuracy {}%'.format(result5[0])
print 'final result: {}'.format(result5[1])

result_words5 = convert_to_words(result5[1])
print '====================='
print 'pre smooth words: {}'.format(pre_smooth_words5)
print '====================='
print 'result words: {}'.format(result_words5)

print '\n'
print "pre smooth stats"
print get_position_stats(pre_smooth_words5)

print '\n'

print 'result stats'
print get_position_stats(result_words5)
"""