In [2]:
from sklearn.decomposition import PCA 
import numpy as np
import pandas as pd
import plotly.express as px
from datetime import datetime
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Importando os dados
df_lag_cp = pd.read_csv('data/bem_comportadas_laguardia.csv')
df_lag_li = pd.read_csv('data/livres_laguardia.csv')
df_van_cp = pd.read_csv('data/bem_comportadas_vanessa.csv')
df_van_li = pd.read_csv('data/livres_vanessa.csv')
df_bru_cp = pd.read_csv('data/bem_comportadas_bruno.csv')
df_bru_li = pd.read_csv('data/livres_bruno.csv')

In [3]:
# Printando o tamanho dos dados
print('Tamanho dos dados:')
print('Laguardia: ', df_lag_cp.shape, df_lag_li.shape)
print('Vanessa: ', df_van_cp.shape, df_van_li.shape)
print('Bruno: ', df_bru_cp.shape, df_bru_li.shape)

Tamanho dos dados:
Laguardia:  (8154, 13) (3775, 13)
Vanessa:  (7160, 13) (3761, 13)
Bruno:  (8889, 13) (2742, 13)


In [88]:
# Separando os dados em X e Y
X_lag_cp = df_lag_cp.drop('pose', axis=1)
Y_lag_cp = df_lag_cp['pose'].astype('string')
X_lag_li = df_lag_li.drop('pose', axis=1)
Y_lag_li = df_lag_li['pose'].astype('string')
X_van_cp = df_van_cp.drop('pose', axis=1)
Y_van_cp = df_van_cp['pose'].astype('string')
X_van_li = df_van_li.drop('pose', axis=1)
Y_van_li = df_van_li['pose'].astype('string')
X_bru_cp = df_bru_cp.drop('pose', axis=1)
Y_bru_cp = df_bru_cp['pose'].astype('string')
X_bru_li = df_bru_li.drop('pose', axis=1)
Y_bru_li = df_bru_li['pose'].astype('string')

In [89]:
Y_lag_cp

0       0
1       0
2       0
3       1
4       1
       ..
8149    0
8150    0
8151    0
8152    0
8153    0
Name: pose, Length: 8154, dtype: string

# Aplicando PCA para analisar padrões

In [90]:
# Laguardia
pca = PCA(n_components=2)
lag_cp_pca = pca.fit_transform(X_lag_cp)
lag_li_pca = pca.transform(X_lag_li)

fig = px.scatter(x=lag_cp_pca[:,0], y=lag_cp_pca[:,1], color=Y_lag_cp, title='Laguardia - Comportadas')
fig.show()
fig = px.scatter(x=lag_li_pca[:,0], y=lag_li_pca[:,1], color=Y_lag_li, title='Laguardia - Livres')
fig.show()


In [91]:
# Vanessa
pca = PCA(n_components=2)
van_cp_pca = pca.fit_transform(X_van_cp)
van_li_pca = pca.transform(X_van_li)

fig = px.scatter(x=van_cp_pca[:,0], y=van_cp_pca[:,1], color=Y_van_cp, title='Vanessa - Comportadas')
fig.show()
fig = px.scatter(x=van_li_pca[:,0], y=van_li_pca[:,1], color=Y_van_li, title='Vanessa - Livres')
fig.show()

In [92]:
# Bruno
pca = PCA(n_components=2)
bru_cp_pca = pca.fit_transform(X_bru_cp)
bru_li_pca = pca.transform(X_bru_li)

fig = px.scatter(x=bru_cp_pca[:,0], y=bru_cp_pca[:,1], color=Y_bru_cp, title='Bruno - Comportadas')
fig.show()
fig = px.scatter(x=bru_li_pca[:,0], y=bru_li_pca[:,1], color=Y_bru_li, title='Bruno - Livres')
fig.show()

# Treinando RF's nas poses comportadas

In [93]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [94]:
# Laguardia
rf_lag = RandomForestClassifier(n_estimators=100)
rf_lag.fit(X_lag_cp, Y_lag_cp)

print('Laguardia:')
print('Score no treino: ', rf_lag.score(X_lag_cp, Y_lag_cp))
print('Score no teste: ', rf_lag.score(X_lag_li, Y_lag_li))

px.imshow(confusion_matrix(Y_lag_li, rf_lag.predict(X_lag_li)), text_auto=True, title='Laguardia', color_continuous_scale='deep',
            labels=dict(x='Predição', y='Real', color='Quantidade'))

Laguardia:
Score no treino:  0.9992641648270787
Score no teste:  0.48


In [95]:
# Vanessa
rf_van = RandomForestClassifier(n_estimators=100)
rf_van.fit(X_van_cp, Y_van_cp)

print('Vanessa:')
print('Score no treino: ', rf_van.score(X_van_cp, Y_van_cp))
print('Score no teste: ', rf_van.score(X_van_li, Y_van_li))

px.imshow(confusion_matrix(Y_van_li, rf_van.predict(X_van_li)), text_auto=True, title='Vanessa', color_continuous_scale='deep',
            labels=dict(x='Predição', y='Real', color='Quantidade'))

Vanessa:
Score no treino:  1.0
Score no teste:  0.8162722680138261


In [96]:
# Bruno
rf_bru = RandomForestClassifier(n_estimators=100)
rf_bru.fit(X_bru_cp, Y_bru_cp)

print('Bruno:')
print('Score no treino: ', rf_bru.score(X_bru_cp, Y_bru_cp))
print('Score no teste: ', rf_bru.score(X_bru_li, Y_bru_li))

px.imshow(confusion_matrix(Y_bru_li, rf_bru.predict(X_bru_li)), text_auto=True, title='Bruno', color_continuous_scale='deep',
            labels=dict(x='Predição', y='Real', color='Quantidade'))

Bruno:
Score no treino:  0.9995500056249297
Score no teste:  0.35412107950401167


In [97]:
lag_preds = pd.DataFrame()
lag_preds['real'] = Y_lag_li
lag_preds['preds'] = rf_lag.predict(X_lag_li)
lag_preds.reset_index(inplace=True)

fig = go.Figure()
fig.add_trace(go.Scatter(x=lag_preds.index, y=lag_preds['real'], name='Real'))
fig.add_trace(go.Scatter(x=lag_preds.index, y=lag_preds['preds'], name='Predição'))
fig.update_layout(title='Laguardia', xaxis_title='Amostra', yaxis_title='Pose')

In [98]:
bru_preds = pd.DataFrame()
bru_preds['real'] = Y_bru_li
bru_preds['preds'] = rf_bru.predict(X_bru_li)
bru_preds.reset_index(inplace=True)

fig = go.Figure()
fig.add_trace(go.Scatter(x=bru_preds.index, y=bru_preds['real'], name='Real'))
fig.add_trace(go.Scatter(x=bru_preds.index, y=bru_preds['preds'], name='Predição'))
fig.update_layout(title='Bruno', xaxis_title='Amostra', yaxis_title='Pose')

In [99]:
# Vanessa
van_preds = pd.DataFrame()
van_preds['real'] = Y_van_li
van_preds['preds'] = rf_van.predict(X_van_li)
van_preds.reset_index(inplace=True)

fig = go.Figure()
fig.add_trace(go.Scatter(x=van_preds.index, y=van_preds['real'], name='Real'))
fig.add_trace(go.Scatter(x=van_preds.index, y=van_preds['preds'], name='Predição'))
fig.update_layout(title='Vanessa', xaxis_title='Amostra', yaxis_title='Pose')