# Treinando os modelos

In [1]:
import pandas as pd
import numpy as np
from sklearnex import patch_sklearn
patch_sklearn()
from sklearn.model_selection import train_test_split
import glob
import joblib
from sklearn.svm import SVC


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
scm_licoes = pd.read_csv('downloads/imagens/SCMs-licoes.csv')
licoes_utilizadas = scm_licoes.loc[scm_licoes["presente"] == 1, 'scm']
tamanho_janela = 24 # tamanho em pixels

In [3]:
for s in licoes_utilizadas.tolist():
    X, y = np.load(f'pre-process/data/data-{s}.npy'), np.load(f'pre-process/data/target-{s}.npy')
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state=13, stratify=y)

    data, target = [], []

    # print(X_train.shape)
    # print(np.count_nonzero(y_train))

    img_gray = X_train[y_train == 1][:, :tamanho_janela ** 2].reshape(-1, tamanho_janela, tamanho_janela)
    img_hog = X_train[y_train == 1][:, tamanho_janela ** 2:2 * tamanho_janela ** 2].reshape(-1, tamanho_janela, tamanho_janela)
    img_lbp = X_train[y_train == 1][:, 2 * tamanho_janela ** 2:].reshape(-1, tamanho_janela, tamanho_janela)

    img = np.stack([
            img_gray.reshape(-1, tamanho_janela ** 2),
            img_hog.reshape(-1, tamanho_janela ** 2),
            img_lbp.reshape(-1, tamanho_janela ** 2)
        ], axis=1).reshape(-1, 3 * (tamanho_janela ** 2))


    for r in ['rotaciona 0', 'rotaciona 90', 'rotaciona 180', 'rotaciona 270']:
        
        # flip Up/Down

        img_flipud = np.stack([
            np.flip(img_gray, axis=1).reshape(-1, tamanho_janela ** 2),
            np.flip(img_hog, axis=1).reshape(-1, tamanho_janela ** 2),
            np.flip(img_lbp, axis=1).reshape(-1, tamanho_janela ** 2)
        ], axis=1).reshape(-1, 3 * (tamanho_janela ** 2))

        # flip Left/Right

        img_fliplr = np.concatenate([
            np.flip(img_gray, axis=2).reshape(-1, tamanho_janela ** 2),
            np.flip(img_hog, axis=2).reshape(-1, tamanho_janela ** 2),
            np.flip(img_lbp, axis=2).reshape(-1, tamanho_janela ** 2)
        ], axis=1).reshape(-1, 3 * (tamanho_janela ** 2))

        # flip

        img_flip = np.concatenate([
            np.flip(np.flip(img_gray, axis=1), axis=2).reshape(-1, tamanho_janela ** 2),
            np.flip(np.flip(img_hog, axis=1), axis=2).reshape(-1, tamanho_janela ** 2),
            np.flip(np.flip(img_lbp, axis=1), axis=2).reshape(-1, tamanho_janela ** 2)
        ], axis=1).reshape(-1, 3 * (tamanho_janela ** 2))

        for fl_img in (img, img_flipud, img_fliplr, img_flip):
        # for fl_img in (img):
            # print(fl_img.shape)
            data.append(fl_img.reshape(-1, 3 * (tamanho_janela ** 2)))
            # target.append(1)


        img_gray, img_hog, img_lbp = np.rot90(img_gray, axes=(1,2)), np.rot90(img_hog, axes=(1,2)), np.rot90(img_lbp, axes=(1,2))

    data = np.concatenate(data)
    X_train = np.concatenate([X_train[y_train == 0], data])
    y_data = np.ones(data.shape[0])
    y_train = np.concatenate([y_train[y_train == 0], y_data])

    np.save(f'pre-process/data/data-{s}-train.npy', X_train)
    np.save(f'pre-process/data/data-{s}-test.npy', X_test)
    np.save(f'pre-process/data/target-{s}-train.npy', y_train)
    np.save(f'pre-process/data/target-{s}-test.npy', y_test)

    # print(f'SCM: {s}')

del X_train, X_test, y_train, y_test

X_train =  np.concatenate([np.load(f'pre-process/data/data-{s}-train.npy') for s in licoes_utilizadas.tolist()])
y_train =  np.concatenate([np.load(f'pre-process/data/target-{s}-train.npy') for s in licoes_utilizadas.tolist()])

In [4]:
X_train.shape, y_train.shape

((270653, 1728), (270653,))

In [5]:
np.count_nonzero(y_train)

39872

## Reduzindo a dimensionalidade

In [6]:
from sklearn.decomposition import PCA

In [7]:
pca = PCA(n_components=0.95)

In [8]:
X_test =  np.concatenate([np.load(f'pre-process/data/data-{s}-test.npy') for s in licoes_utilizadas.tolist()])

In [9]:
X = np.concatenate([X_train, X_test])

In [10]:
pca.fit(X)

PCA(n_components=0.95)

In [11]:
del X

In [12]:
del X_test

In [13]:
X_train = pca.transform(X_train)

In [14]:
X_train.shape

(270653, 438)

In [15]:
joblib.dump(pca, 'resultados/modelos-treinados/PCA-faixas-pedestre.pkl') 

['resultados/modelos-treinados/PCA-faixas-pedestre.pkl']

## Treinando modelo

In [16]:
# from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
# from sklearn.kernel_approximation import RBFSampler
# from sklearn.linear_model import SGDClassifier
# from sklearn.svm import SVC, LinearSVC
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.linear_model import LogisticRegression
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.neural_network import MLPClassifier
# from sklearnex import patch_sklearn
# patch_sklearn()

# rbf_feature = RBFSampler(gamma=1, random_state=1)
# X_train = rbf_feature.fit_transform(X_train)
# clf = SGDClassifier(n_jobs=7, verbose=6, max_iter=5000)

# clf = MLPClassifier(max_iter=300, activation='relu', verbose=True, learning_rate='adaptive') # 46 minutos, acuracia de 94%, recall acima de 94%
clf = SVC(gamma='auto', probability=False, verbose=True)# Reduzindo a dimensionalidade demorou 72 minutos, com acc. 
# clf = LinearSVC(verbose=9)
# clf = DecisionTreeClassifier()# ruim, acuracia abaixo de 82%
# clf = KNeighborsClassifier(n_jobs=7) # 1 min <-- .95,7 com recall de .93
# clf = KNeighborsClassifier(n_jobs=7, weights='distance')
# clf = AdaBoostClassifier() # 
# clf = RandomForestClassifier(n_estimators=100, n_jobs=7, verbose=6) # Acur. de 89%, ruim
# clf = SGDClassifier(n_jobs=7, verbose=6, max_iter=5000)# ruim, acc. abaixo de 80%

In [17]:
X_train.shape

(270653, 438)

In [18]:
clf.fit(X_train, y_train)

SVC(gamma='auto', verbose=True)

In [19]:
clf

SVC(gamma='auto', verbose=True)

In [20]:
# clf.score(X_train, y_train)

In [21]:
del X_train

In [22]:
# X_test =  np.concatenate([np.load(f'pre-process/data/data-{s}.npy') for s in scm_test.tolist()])
# y_test =  np.concatenate([np.load(f'pre-process/data/target-{s}.npy') for s in scm_test.tolist()])

In [23]:
X_test =  np.concatenate([np.load(f'pre-process/data/data-{s}-test.npy') for s in licoes_utilizadas.tolist()])
y_test =  np.concatenate([np.load(f'pre-process/data/target-{s}-test.npy') for s in licoes_utilizadas.tolist()])

In [24]:
X_test = pca.transform(X_test)

In [25]:
y_pred = clf.predict(X_test)

In [26]:
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [27]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9954790958191638


In [28]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     98912
           1       0.79      0.79      0.79      1068

    accuracy                           1.00     99980
   macro avg       0.89      0.89      0.89     99980
weighted avg       1.00      1.00      1.00     99980



In [29]:
print(confusion_matrix(y_test, y_pred))

[[98682   230]
 [  222   846]]


## Salvando o modelo para usa-lo nas inferências

In [30]:
joblib.dump(clf, 'resultados/modelos-treinados/SVC-faixas-pedestre.pkl')
# joblib.dump(clf, 'resultados/modelos-treinados/KNeighbors-faixas-pedestre.pkl')


['resultados/modelos-treinados/SVC-faixas-pedestre.pkl']