# Preparando as lições para o treinamento por aprendizado de máquina

Nesse notebook, a partir de parametros estabelecidos, as lições serão preparadas para então ser realizado o treinamento, para então serem realizadas as validações e inferências do modelo para detecção de faixas de pedestre.

## Selecionando as quadrículas SCM das licoes

In [None]:
import geopandas as gpd
import pandas as pd

In [None]:
gdf_scm = gpd.read_file('zip://downloads/SIRGAS_SHP_quadriculaortofoto2017.zip!/SIRGAS_SHP_quadriculaortofoto2017/SIRGAS_SHP_quadriculaortofoto2017.shp')

In [None]:
scm_licoes = pd.read_csv('downloads/imagens/SCMs-licoes.csv')

In [None]:
# scm_licoes.loc[scm_licoes["presente"] == 1]
licoes_utilizadas = scm_licoes.loc[scm_licoes["presente"] == 1, 'scm']

In [None]:
gdf_scm[gdf_scm['qo2010_cod'].isin(licoes_utilizadas)]

## Gerando as Labels de faixas de pedestre

In [None]:
licoes = gpd.read_file('licoes/licoes-por-scm.gpkg')

In [None]:
scm = '3313-414'
licoes = licoes[licoes.scm == scm]

In [None]:
len(licoes)

In [None]:
import rasterio
from rasterio import features
from matplotlib import pyplot
from rasterio.plot import show
# from geocube.api.core import make_geocube

In [None]:
img = "downloads/imagens/ORTO_3313-414_RGB_1000_v2.jp2"
dataset = rasterio.open(img)

In [None]:
dataset.width, dataset.height

In [None]:
{i: dtype for i, dtype in zip(dataset.indexes, dataset.dtypes)}

In [None]:
dataset.bounds

In [None]:
labels = features.rasterize(
            ((g, 255) for g in licoes.geometry.explode()),
            out_shape=dataset.shape,
            transform=dataset.transform)

In [None]:
labels.shape, dataset.read(1).shape

In [None]:
pyplot.imshow(labels)
pyplot.show()

In [None]:
# with rasterio.open(
#         'rasterized-results.tif', 'w',
#         driver='GTiff',
#         dtype=rasterio.uint8,
#         count=1,
#         width=dataset.width,
#         height=dataset.height,
#         transform=dataset.transform) as dst:
#     dst.write(image, indexes=1)

In [None]:
pyplot.imshow(dataset.read(1))
pyplot.show()

In [None]:
show(dataset)

## Gerando mascaras de quadras

In [None]:
gdf_quadras = gpd.read_file('zip://downloads/SIRGAS_SHP_quadraMDSF.zip!/SIRGAS_SHP_quadraMDSF/SIRGAS_SHP_quadraMDSF.shp')

In [None]:
from shapely.geometry import box
gpd.GeoSeries(box(*dataset.bounds))

In [None]:
quadras_scm = gpd.clip(gdf_quadras, gpd.GeoSeries(box(*dataset.bounds))).unary_union

In [None]:
# print(quadras_scm)

In [None]:
quadras = features.rasterize(
            ((g, 255) for g in quadras_scm),
            out_shape=dataset.shape,
            transform=dataset.transform)

In [None]:
quadras.shape, labels.shape, dataset.shape

In [None]:
pyplot.imshow(quadras)

## Preparando as imagens

### Gerando GrayScale



In [None]:
grayscale = 0.2125 * dataset.read(1) + \
            0.7154 * dataset.read(2) + \
            0.0721 * dataset.read(3)

In [None]:
grayscale.astype('uint8')

In [None]:
pyplot.imshow(grayscale.astype('uint8'), cmap='gray')

In [None]:
with rasterio.open(
        'tmp/grayscale-results.tif', 'w',
        driver='GTiff',
        dtype=rasterio.uint8,
        count=1,
        width=dataset.width,
        height=dataset.height,
        transform=dataset.transform) as dst:
    dst.write(grayscale.astype('uint8'), indexes=1)

### Gerando HOG e Local Binary Pattern (LBPH)

In [None]:
from skimage.feature import hog

In [None]:
fd, hog_image = hog(grayscale, orientations=8, visualize=True, 
                    pixels_per_cell=(8, 8),
                    cells_per_block=(1, 1))

In [None]:
pyplot.imshow(hog_image)

In [None]:
from skimage.feature import local_binary_pattern

In [None]:
radius = 3
n_points = 8 * radius

In [None]:
lbp = local_binary_pattern(grayscale, n_points, radius)

In [None]:
pyplot.imshow(lbp)

## Reescalonando

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [None]:
scaler = StandardScaler()

In [None]:
grayscale = scaler.fit_transform(grayscale)

In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))

In [None]:
hog_image = scaler.fit_transform(hog_image)

In [None]:
lbp = scaler.fit_transform(lbp)

## Gerando as lições 

Considera lição se a condiçao for:

* mais de 80% dos pixels dentro da lição da faixa de pedestre
* apenas de 5% ou menos dos pixels estiverem fora dentro da quadra viária
* 100% dos pixels dentro do SCM

### DAta Augmentation das lições das faixas de pedestre

* Rotate 0, 90, 180, 270 
* Cada rotate Flip H e V
* 16 possibilidades de Augmentation
* (Talvez um rotation a cada 15 graus, 6 possibilides, aumentando total para 96 possibilidades)

In [None]:
tamanho_janela = 24 # tamanho em pixels
sobreposicao_de_janelas = 16 # sobreposição das janelas em pixels

In [None]:
# grayscale = grayscale.astype('uint8')

In [None]:
import numpy as np

In [None]:
# 4864 920
pyplot.imshow(grayscale[4864:4864+tamanho_janela, 920:920+tamanho_janela], cmap='gray')

In [None]:
pyplot.imshow(np.flip(grayscale[4864:4864+tamanho_janela, 920:920+tamanho_janela]), cmap='gray')

In [None]:
# pyplot.imshow(np.flipud(grayscale[4864:4864+tamanho_janela, 920:920+tamanho_janela]), cmap='gray')
# pyplot.imshow(np.fliplr(grayscale[4864:4864+tamanho_janela, 920:920+tamanho_janela]), cmap='gray')

In [None]:
pyplot.imshow(hog_image[4864:4864+tamanho_janela, 920:920+tamanho_janela], cmap='gray')

In [None]:
pyplot.imshow(lbp[4864:4864+tamanho_janela, 920:920+tamanho_janela], cmap='gray')

In [None]:
np.concatenate([grayscale[4864:4864+tamanho_janela, 920:920+tamanho_janela],
                hog_image[4864:4864+tamanho_janela, 920:920+tamanho_janela],
                lbp[4864:4864+tamanho_janela, 920:920+tamanho_janela]]).reshape(1, 72*24).shape

In [None]:
np.empty((1, 3 * (tamanho_janela ** 2)))

In [None]:
scaler_filenameem dentro de um label de faixa de pedestre
            img_gray = grayscale[i:i+tamanho_janela, j:j+tamanho_janela]
            img_hog = hog_image[i:i+tamanho_janela, j:j+tamanho_janela]
            img_lbp = lbp[i:i+tamanho_janela, j:j+tamanho_janela]

            # Data Augmentation
            
            img = np.concatenate([
                img_gray,
                img_hog,
                img_lbp
            ]).reshape(1, 3 * (tamanho_janela ** 2))
            
            if np.count_nonzero(labels[i:i+tamanho_janela, j:j+tamanho_janela] == 255) > 0.80 * (tamanho_janela ** 2):
                faixa += 16
                

                for r in ['rotaciona 0', 'rotaciona 90', 'rotaciona 180', 'rotaciona 270']:
                    
                    # flip Up/Down

                    img_flipud = np.concatenate([
                        np.flipud(img_gray),
                        np.flipud(img_hog),
                        np.flipud(img_lbp)
                    ]).reshape(1, 3 * (tamanho_janela ** 2))

                    # flip Left/Right
scaler_filename
                    ]).reshape(1, 3 * (tamanho_janela ** 2))

                    # flip

                    img_flip = np.concatenate([
                        np.flip(img_gray),
                        np.flip(img_hog),
                        np.flip(img_lbp)
                    ]).reshape(1, 3 * (tamanho_janela ** 2))

                    for fl_img in (img, img_flipud, img_fliplr, img_flip):
                    # for fl_img in (img):
                        data.append(fl_img.reshape(1, 3 * (tamanho_janela ** 2)))
                        target.append(1)


                    img_gray, img_hog, img_lbp = np.rot90(img_gray), np.rot90(img_hog), np.rot90(img_lbp)

            else:
                rua += 1
                data.append(img.reshape(1, 3 * (tamanho_janela ** 2)))
                target.append(0)



print(f"{rua} ruas e {faixa} faixas")

In [None]:
nd_data = np.array(data).reshape(len(data), 3 * (tamanho_janela ** 2))

In [None]:
nd_data.shape

In [None]:
nd_target = np.array(target)

In [None]:
np.count_nonzero(nd_target)

## Dividindo o conjunto de dados em treino e teste

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(nd_data, nd_target, test_size=0.3,random_state=13, stratify=nd_target)

In [None]:
np.count_nonzero(y_train) / np.count_nonzero(nd_target)

## Iniciando o treinamento do modelo

In [None]:
from sklearn.svm import LinearSVC

# clf = svm.SVC(kernel='linear') # Linear Kernel
clf = LinearSVC()

In [None]:
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

In [None]:
y_pred

## Validando o modelo

In [None]:
from sklearn import metrics

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

## Testando outros modelos

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
clf = KNeighborsClassifier()

In [None]:
clf.fit(X_train, y_train)

In [None]:
predicted = clf.predict(X_test)

In [None]:
print("Accuracy:",metrics.accuracy_score(y_test, predicted))

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
clf = RandomForestClassifier(n_estimators=100)

In [None]:
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

In [None]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [None]:
from sklearn.linear_model import SGDClassifier

In [None]:
clf = SGDClassifier()

In [None]:
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

In [None]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
confusion_matrix(y_test, y_pred)

In [None]:
y_test

In [None]:
y_pred

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test, y_pred))

## Iterando sobre todos os SCMs e todas as licoes