# TP - Reconnaissance de configurations manuelles Python et Scikit-Learn

## Import

In [16]:
from glob import glob
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers.core import Dense
from keras.optimizers import SGD

#Require a folder with all csv files
PATH = './PositionHandJoints'

## Functions

In [2]:
#For Windows only
def identify_classes(path):
    classes = set()
    files = [f.split('\\')[1].split('.')[0] for f in glob(path+'/*.csv')]
    for file in files:
        classes.add(file.split('_')[-1])
    return list(classes)

def construct_dataset(path):
    classes = identify_classes(path)
    dataset = {}
    for classe in classes:
        file_classe = glob(path+f'/*{classe}.csv')
        values = []
        for fc in file_classe:
            values.extend(pd.read_csv(fc, sep=' ').fillna(0).values)
        dataset[classe] = [list(v) for v in values]
    return dataset

def flatten(l):
    return [v for s in l for v in s]

## Construction

In [3]:
dataset = construct_dataset(PATH)
keys = list(dataset.keys())

#data / target are like: [0.2 ... -0.8] --> 1 which means each frame correspond to a class
data = flatten([dataset[key] for key in keys])
target = flatten([[keys.index(key)] * len(dataset[key]) for key in keys])

## Preprocessing

In [4]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2)
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

scaler = StandardScaler().fit(data)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Y_train = to_categorical(y_train)
Y_test = to_categorical(y_test)

## Workflow

### SVC

In [9]:
svc = SVC(gamma='auto')
svc.fit(X_train, y_train)
svc.score(X_test, y_test)

0.5760973030142782

### LogisticRegression

In [13]:
lr = LogisticRegression(max_iter=1000).fit(X_train, y_train)
lr.score(X_test, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


0.439555790586991

### KNN

In [5]:
neigh = KNeighborsClassifier(n_neighbors=Y_train.shape[1])
neigh.fit(X_train, y_train)
neigh.score(X_test, y_test)

0.5118984664198837

### GaussianNB

In [15]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
gnb.score(X_test, y_test)

0.15631940772078265

### MLP

In [6]:
model = Sequential()
model.add(Dense(X_train.shape[1], activation='relu'))
model.add(Dense(X_train.shape[1] * 2, activation='relu'))
model.add(Dense(Y_train.shape[1], activation='softmax'))

sgd = SGD(lr=0.01, decay=1e-15, momentum=0.99, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(X_train, Y_train,epochs=20, batch_size=256)

score = model.evaluate(X_test, Y_test, verbose=0)
print(model.metrics_names, '-->', score)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
['loss', 'accuracy'] --> [0.5835664868354797, 0.612057089805603]


From classifiers we observe that SVC seems to be the best.
MLP is better than any classifier.
The results are still not satisfiying.