# Pré-Processamento dos dados

In [None]:
import geopandas as gpd
import pandas as pd

In [None]:
scm_licoes = pd.read_csv('downloads/imagens/SCMs-licoes.csv')

In [None]:
licoes_utilizadas = scm_licoes.loc[scm_licoes["presente"] == 1, 'scm']

In [None]:
licoes = gpd.read_file('licoes/licoes-por-scm.gpkg')

In [None]:
licoes = licoes[licoes.scm.isin(licoes_utilizadas)]

In [None]:
gdf_quadras = gpd.read_file('zip://downloads/SIRGAS_SHP_quadraMDSF.zip!/SIRGAS_SHP_quadraMDSF/SIRGAS_SHP_quadraMDSF.shp')

In [None]:
import rasterio
import numpy as np
from rasterio import features
from matplotlib import pyplot
from shapely.geometry import box
from skimage.feature import hog
from skimage.feature import local_binary_pattern
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import joblib
# from rasterio.plot import show

In [None]:
import glob
import re

## Re-Escalando e Normalizando os dados

In [None]:
radius = 3
n_points = 8 * radius
tamanho_janela = 24 # tamanho em pixels
sobreposicao_de_janelas = 16 # sobreposição das janelas em pixels

In [None]:
scms, shapes, transforms, grayscales, hogs, lbps = [], [], [], [], [], []

for f in glob.glob("downloads/imagens/*_RGB*.jp2"):

    scm = re.search('_(.*)_RGB', f).group(1)
    scms.append(scm)
    
    print(f'Processando SCM: {scm}')
    
    dataset = rasterio.open(f)
    transforms.append(dataset.transform)
    shapes.append(dataset.shape) 
    
    grayscale = 0.2125 * dataset.read(1) + \
            0.7154 * dataset.read(2) + \
            0.0721 * dataset.read(3)
    grayscale = grayscale.astype('uint8')
    grayscales.append(grayscale)


In [None]:
scaler = StandardScaler()
scaler.fit(np.concatenate([x.reshape(-1) for x in grayscales]).reshape(-1, 1))

In [None]:
joblib.dump(scaler, 'pre-process/grayscale.scaler.save') 

In [None]:
for k, s in enumerate(scms):
    grayscale = scaler.transform(grayscales[k].reshape(-1, 1)).reshape(grayscales[k].shape)
    with rasterio.open(
        f'pre-process/grayscales/gray-scale-{s}.jp2', 'w',
        driver='GTiff',
        dtype=rasterio.float64,
        count=1,
        width=shapes[k][1],
        height=shapes[k][0],
        transform=transforms[k],
        crs = rasterio.crs.CRS.from_string('EPSG:31983')) as dst:
        dst.write(grayscale, indexes=1)
    print(k, s)

In [None]:
for k, s in enumerate(scms):
   
    print(f'Processando SCM: {s}')

    fd, hog_image = hog(grayscales[k], orientations=9, visualize=True, 
                    pixels_per_cell=(24, 24),
                    cells_per_block=(3, 3))
    hogs.append(hog_image)

    lbp = local_binary_pattern(grayscales[k], n_points, radius)
    lbps.append(lbp)
    
    # break

In [None]:
# for k, s in enumerate(scms):
#     # grayscale = scaler.transform(grayscales[k].reshape(-1, 1)).reshape(grayscales[k].shape)
#     with rasterio.open(
#         f'pre-process/hogs/hog-{s}.jp2', 'w',
#         driver='GTiff',
#         dtype=rasterio.float64,
#         count=1,
#         width=shapes[k][1],
#         height=shapes[k][0],
#         transform=transforms[k],
#         crs = rasterio.crs.CRS.from_string('EPSG:31983')) as dst:
#         dst.write(hogs[k], indexes=1)
#     with rasterio.open(
#         f'pre-process/lbps/lbp-{s}.jp2', 'w',
#         driver='GTiff',
#         dtype=rasterio.float64,
#         count=1,
#         width=shapes[k][1],
#         height=shapes[k][0],
#         transform=transforms[k],
#         crs = rasterio.crs.CRS.from_string('EPSG:31983')) as dst:
#         dst.write(lbps[k], indexes=1)
#     print(k, s)

    # break

In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))

In [None]:
scaler.fit(np.concatenate([x.reshape(-1) for x in hogs]).reshape(-1, 1))

In [None]:
joblib.dump(scaler, 'pre-process/hog.scaler.save') 

In [None]:
for k, s in enumerate(scms):
    np.save(f'pre-process/hogs/hog-{s}.npy', scaler.transform(hogs[k].reshape(-1, 1)).reshape(hogs[k].shape))
    print(s)
    # print(scaler.transform(hogs[k].reshape(-1, 1)).reshape(hogs[k].shape))
    # break

In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))

In [None]:
scaler.fit(np.concatenate([x.reshape(-1) for x in lbps]).reshape(-1, 1))

In [None]:
joblib.dump(scaler, 'pre-process/lbp.scaler.save')

In [None]:
for k, s in enumerate(scms):
    np.save(f'pre-process/lbps/lbp-{s}.npy', scaler.transform(lbps[k].reshape(-1, 1)).reshape(lbps[k].shape))
    print(s)