# Treinando os modelos

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import glob

In [2]:
scm_licoes = pd.read_csv('downloads/imagens/SCMs-licoes.csv')
licoes_utilizadas = scm_licoes.loc[scm_licoes["presente"] == 1, 'scm']
tamanho_janela = 24 # tamanho em pixels

In [3]:
for s in licoes_utilizadas.tolist():
    X, y = np.load(f'pre-process/data/data-{s}.npy'), np.load(f'pre-process/data/target-{s}.npy')
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state=13, stratify=y)

    data, target = [], []

    # print(X_train.shape)
    # print(np.count_nonzero(y_train))

    img_gray = X_train[y_train == 1][:, :tamanho_janela ** 2].reshape(-1, tamanho_janela, tamanho_janela)
    img_hog = X_train[y_train == 1][:, tamanho_janela ** 2:2 * tamanho_janela ** 2].reshape(-1, tamanho_janela, tamanho_janela)
    img_lbp = X_train[y_train == 1][:, 2 * tamanho_janela ** 2:].reshape(-1, tamanho_janela, tamanho_janela)

    img = np.stack([
            img_gray.reshape(-1, tamanho_janela ** 2),
            img_hog.reshape(-1, tamanho_janela ** 2),
            img_lbp.reshape(-1, tamanho_janela ** 2)
        ], axis=1).reshape(-1, 3 * (tamanho_janela ** 2))


    for r in ['rotaciona 0', 'rotaciona 90', 'rotaciona 180', 'rotaciona 270']:
        
        # flip Up/Down

        img_flipud = np.stack([
            np.flip(img_gray, axis=1).reshape(-1, tamanho_janela ** 2),
            np.flip(img_hog, axis=1).reshape(-1, tamanho_janela ** 2),
            np.flip(img_lbp, axis=1).reshape(-1, tamanho_janela ** 2)
        ], axis=1).reshape(-1, 3 * (tamanho_janela ** 2))

        # flip Left/Right

        img_fliplr = np.concatenate([
            np.flip(img_gray, axis=2).reshape(-1, tamanho_janela ** 2),
            np.flip(img_hog, axis=2).reshape(-1, tamanho_janela ** 2),
            np.flip(img_lbp, axis=2).reshape(-1, tamanho_janela ** 2)
        ], axis=1).reshape(-1, 3 * (tamanho_janela ** 2))

        # flip

        img_flip = np.concatenate([
            np.flip(np.flip(img_gray, axis=1), axis=2).reshape(-1, tamanho_janela ** 2),
            np.flip(np.flip(img_hog, axis=1), axis=2).reshape(-1, tamanho_janela ** 2),
            np.flip(np.flip(img_lbp, axis=1), axis=2).reshape(-1, tamanho_janela ** 2)
        ], axis=1).reshape(-1, 3 * (tamanho_janela ** 2))

        for fl_img in (img, img_flipud, img_fliplr, img_flip):
        # for fl_img in (img):
            # print(fl_img.shape)
            data.append(fl_img.reshape(-1, 3 * (tamanho_janela ** 2)))
            # target.append(1)


        img_gray, img_hog, img_lbp = np.rot90(img_gray, axes=(1,2)), np.rot90(img_hog, axes=(1,2)), np.rot90(img_lbp, axes=(1,2))

    data = np.concatenate(data)
    X_train = np.concatenate([X_train[y_train == 0], data])
    y_data = np.ones(data.shape[0])
    y_train = np.concatenate([y_train[y_train == 0], y_data])

    np.save(f'pre-process/data/data-{s}-train.npy', X_train)
    np.save(f'pre-process/data/data-{s}-test.npy', X_test)
    np.save(f'pre-process/data/target-{s}-train.npy', y_train)
    np.save(f'pre-process/data/target-{s}-test.npy', y_test)

    # print(f'SCM: {s}')

del X_train, X_test, y_train, y_test

X_train =  np.concatenate([np.load(f'pre-process/data/data-{s}-train.npy') for s in licoes_utilizadas.tolist()])
y_train =  np.concatenate([np.load(f'pre-process/data/target-{s}-train.npy') for s in licoes_utilizadas.tolist()])

In [4]:
X_train.shape, y_train.shape

((218751, 1728), (218751,))

In [5]:
np.count_nonzero(y_train)

160896

In [6]:
## Opção utilizando um percentual de SCMs como treinamento e outro como teste

# scm_train = licoes_utilizadas.sample(frac=0.5, random_state=13)
# scm_test = licoes_utilizadas[~licoes_utilizadas.isin(scm_train)]

# # scm_train.tolist()

# X_train =  np.concatenate([np.load(f'pre-process/data/data-{s}.npy') for s in scm_train.tolist()])
# y_train =  np.concatenate([np.load(f'pre-process/data/target-{s}.npy') for s in scm_train.tolist()])

## Treinando modelo Random Forest

In [7]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearnex import patch_sklearn
patch_sklearn()

# rbf_feature = RBFSampler(gamma=1, random_state=1)
# X_train = rbf_feature.fit_transform(X_train)
# clf = SGDClassifier(n_jobs=7, verbose=6, max_iter=5000)

# clf = MLPClassifier(max_iter=300, activation='relu', verbose=True, learning_rate='adaptive') # 46 minutos, acuracia de 94%, recall acima de 94%
# clf = SVC(gamma='auto', probability=False, verbose=True)
# clf = LinearSVC(verbose=9)
# clf = DecisionTreeClassifier()# ruim, acuracia abaixo de 82%
# clf = KNeighborsClassifier(n_jobs=7) # 1 min <-- .95,7 com recall de .93
clf = KNeighborsClassifier(n_jobs=7, weights='distance')
# clf = AdaBoostClassifier() # 
# clf = RandomForestClassifier(n_estimators=100, n_jobs=7, verbose=6) # Acur. de 89%, ruim
# clf = SGDClassifier(n_jobs=7, verbose=6, max_iter=5000)# ruim, acc. abaixo de 80%

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [8]:
X_train.shape

(218751, 1728)

In [9]:
clf.fit(X_train, y_train)

KNeighborsClassifier(n_jobs=7, weights='distance')

In [10]:
clf

KNeighborsClassifier(n_jobs=7, weights='distance')

In [11]:
# clf.score(X_train, y_train)

In [12]:
del X_train

In [13]:
# X_test =  np.concatenate([np.load(f'pre-process/data/data-{s}.npy') for s in scm_test.tolist()])
# y_test =  np.concatenate([np.load(f'pre-process/data/target-{s}.npy') for s in scm_test.tolist()])

In [14]:
X_test =  np.concatenate([np.load(f'pre-process/data/data-{s}-test.npy') for s in licoes_utilizadas.tolist()])
y_test =  np.concatenate([np.load(f'pre-process/data/target-{s}-test.npy') for s in licoes_utilizadas.tolist()])

In [None]:
y_pred = clf.predict(X_test)

In [None]:
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [None]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9579898323715307


In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.96      0.98     24801
           1       0.81      0.93      0.87      4311

    accuracy                           0.96     29112
   macro avg       0.90      0.95      0.92     29112
weighted avg       0.96      0.96      0.96     29112



In [None]:
print(confusion_matrix(y_test, y_pred))

[[23880   921]
 [  302  4009]]


## Salvando o modelo para usa-lo nas inferências

In [None]:
import joblib

In [None]:
joblib.dump(clf, 'resultados/modelos-treinados/KNC-faixas-pedestre.pkl')

NameError: name 'clf' is not defined