# Initial Imports & Data labeling

In [1]:
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# ===== Data ===== #
DFS = {
    'lag_cp' : pd.read_csv('data/bem_comportadas_laguardia.csv'),
    'lag_li' : pd.read_csv('data/livres_laguardia.csv'),
    'van_cp' : pd.read_csv('data/bem_comportadas_vanessa.csv'),
    'van_li' : pd.read_csv('data/livres_vanessa.csv'),
    'bru_cp' : pd.read_csv('data/bem_comportadas_bruno.csv'),
    'bru_li' : pd.read_csv('data/livres_bruno.csv'),
    'alm_cp' : pd.read_csv('data/bem_comportadas_almir.csv'),
}
DFS_X = {
    'lag_cp' : DFS['lag_cp'].drop('pose', axis=1),
    'lag_li' : DFS['lag_li'].drop('pose', axis=1),
    'van_cp' : DFS['van_cp'].drop('pose', axis=1),
    'van_li' : DFS['van_li'].drop('pose', axis=1),
    'bru_cp' : DFS['bru_cp'].drop('pose', axis=1),
    'bru_li' : DFS['bru_li'].drop('pose', axis=1),
    'alm_cp' : DFS['alm_cp'].drop('pose', axis=1),
}
DFS_Y = {
    'lag_cp' : DFS['lag_cp']['pose'].astype('string'),
    'lag_li' : DFS['lag_li']['pose'].astype('string'),
    'van_cp' : DFS['van_cp']['pose'].astype('string'),
    'van_li' : DFS['van_li']['pose'].astype('string'),
    'bru_cp' : DFS['bru_cp']['pose'].astype('string'),
    'bru_li' : DFS['bru_li']['pose'].astype('string'),
    'alm_cp' : DFS['alm_cp']['pose'].astype('string'),
}

In [2]:
def recategorize_y(y_vec):
    result = y_vec.copy()
    result[y_vec == '0'] = 'Not Sitting'
    result[y_vec.isin(['1', '2', '12']) ] = 'Sitting Correctly'
    result[y_vec.isin(['3', '6']) ] = 'Leaning Forward'
    result[y_vec == '7'] = 'Leaning Backward'
    result[y_vec.isin(['4', '5', '8', '9', '10', '11']) ] = 'Unbalanced'

    return result

# Analyzing mean pose

In [3]:
def plot_mean_poses(nick):
    fig = make_subplots(rows=4, cols=4, subplot_titles=[f'Pose {i}' for i in range(13)])
    data = DFS[nick+'_cp'].copy()
    data = data.drop(columns=['p07'])


    for i in range(13):
        i = i
        mean_reading = data[data['pose']==i].mean()
        mean_reading = mean_reading.drop(labels=['pose'])

        fig.add_trace(
            go.Bar(
                x=mean_reading.index,
                y=mean_reading.values,
                name=f'Pose {i}',
                marker_color='royalblue',
            ),
            row=(i)//4+1,
            col=(i)%4+1
        )
    
    fig.update_layout(height=800, width=1000, title_text=f'Average pose for {nick}')
    for i in range(13):
        fig.update_yaxes(range=[0, 2000], row=(i)//4+1, col=(i)%4+1)
    fig.show()

In [4]:
plot_mean_poses('van')

In [5]:
plot_mean_poses('alm')

# Training the model

In [6]:
X_train = pd.concat([DFS_X['van_cp'], DFS_X['bru_cp']])
y_train = pd.concat([DFS_Y['van_cp'], DFS_Y['bru_cp']])
y_train = recategorize_y(y_train)

X_test = pd.concat([DFS_X['van_li'], DFS_X['bru_li']])
y_test = pd.concat([DFS_Y['van_li'], DFS_Y['bru_li']])
y_test = recategorize_y(y_test)

X_almir = DFS_X['alm_cp']
y_almir = recategorize_y(DFS_Y['alm_cp'])

In [7]:
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

print('Accuracy on train set: ', accuracy_score(y_train, clf.predict(X_train)))
print('Accuracy on test set: ', accuracy_score(y_test, clf.predict(X_test)))
print('Accuracy on Almir set: ', accuracy_score(y_almir, clf.predict(X_almir)))

Accuracy on train set:  0.9996884541092903
Accuracy on test set:  0.9048131631554667
Accuracy on Almir set:  0.752814873003404


# Training the model with a little glimpse on the user data

In [8]:
X_almir = DFS_X['alm_cp']
y_almir = DFS_Y['alm_cp']
almir_train_X, almir_test_X, almir_train_y, almir_test_y = train_test_split(X_almir, y_almir, test_size=0.9, random_state=42)

X_train = pd.concat([DFS_X['van_cp'], DFS_X['bru_cp'], almir_train_X])
y_train = pd.concat([DFS_Y['van_cp'], DFS_Y['bru_cp'], almir_train_y])
y_train = recategorize_y(y_train)

X_test = pd.concat([DFS_X['van_li'], DFS_X['bru_li']])
y_test = pd.concat([DFS_Y['van_li'], DFS_Y['bru_li']])
y_test = recategorize_y(y_test)

X_almir = almir_test_X
y_almir = recategorize_y(almir_test_y)

In [9]:
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

print('Accuracy on train set: ', accuracy_score(y_train, clf.predict(X_train)))
print('Accuracy on test set: ', accuracy_score(y_test, clf.predict(X_test)))
print('Accuracy on Almir set: ', accuracy_score(y_almir, clf.predict(X_almir)))

Accuracy on train set:  0.9996348143639684
Accuracy on test set:  0.8745194525603568
Accuracy on Almir set:  0.9886561954624782


In [10]:
X_lag = DFS_X['lag_cp']
y_lag = DFS_Y['lag_cp']
lag_train_X, lag_test_X, lag_train_y, lag_test_y = train_test_split(X_lag, y_lag, test_size=0.9, random_state=42)

X_train = pd.concat([DFS_X['van_cp'], DFS_X['bru_cp'], lag_train_X])
y_train = pd.concat([DFS_Y['van_cp'], DFS_Y['bru_cp'], lag_train_y])
y_train = recategorize_y(y_train)

X_test = pd.concat([DFS_X['van_li'], DFS_X['bru_li']])
y_test = pd.concat([DFS_Y['van_li'], DFS_Y['bru_li']])
y_test = recategorize_y(y_test)

X_lag = lag_test_X
y_lag = recategorize_y(lag_test_y)

In [11]:
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

print('Accuracy on train set: ', accuracy_score(y_train, clf.predict(X_train)))
print('Accuracy on test set: ', accuracy_score(y_test, clf.predict(X_test)))
print('Accuracy on Lag Bem Comportado set: ', accuracy_score(y_lag, clf.predict(X_lag)))

Accuracy on train set:  0.9995849146110057
Accuracy on test set:  0.8954328771336306
Accuracy on Lag Bem Comportado set:  0.9873279738383977


In [12]:
y_real = recategorize_y(DFS_Y['lag_li'])
print(f'Accuracy on Lag Livre set: ', accuracy_score(y_real, clf.predict(DFS_X['lag_li'])))

Accuracy on Lag Livre set:  0.6990728476821192


# Testing with retraining Random Forest
- it did not worked, the old random forest was completely rewritten once the second fit was called

In [41]:
# importing multilayer perceptron classifier from sklearn
from sklearn.neural_network import MLPClassifier

In [60]:
model = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)

X_dummy = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
y_dummy = [0, 0, 1, 1, 1]

model.fit(X_dummy, y_dummy)


Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.



In [61]:
for i in range(6):
    x = [i, i]
    print(x, end=' >>> ')
    print(model.predict([x]))

[0, 0] >>> [0]
[1, 1] >>> [0]
[2, 2] >>> [1]
[3, 3] >>> [1]
[4, 4] >>> [1]
[5, 5] >>> [1]


In [78]:
for i in range(10):
    model.partial_fit([[2, 2]], [0])

In [79]:
for i in range(6):
    x = [i, i]
    print(x, end=' >>> ')
    print(model.predict([x]))

[0, 0] >>> [0]
[1, 1] >>> [0]
[2, 2] >>> [0]
[3, 3] >>> [1]
[4, 4] >>> [1]
[5, 5] >>> [1]
