# Pré-Processamento dos dados

In [1]:
import geopandas as gpd
import pandas as pd

In [2]:
scm_licoes = pd.read_csv('downloads/imagens/SCMs-licoes.csv')

In [3]:
licoes_utilizadas = scm_licoes.loc[scm_licoes["presente"] == 1, 'scm']

In [4]:
licoes = gpd.read_file('licoes/licoes-por-scm.gpkg')

In [5]:
licoes = licoes[licoes.scm.isin(licoes_utilizadas)]

In [6]:
gdf_quadras = gpd.read_file('zip://downloads/SIRGAS_SHP_quadraMDSF.zip!/SIRGAS_SHP_quadraMDSF/SIRGAS_SHP_quadraMDSF.shp')

In [7]:
import rasterio
import numpy as np
from rasterio import features
from matplotlib import pyplot
from shapely.geometry import box
# from skimage.feature import hog
from skimage import filters
from skimage.feature import local_binary_pattern
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import joblib
# from rasterio.plot import show

In [8]:
import glob
import re

## Re-Escalando e Normalizando os dados

In [9]:
radius = 3
n_points = 8 * radius
tamanho_janela = 24 # tamanho em pixels
sobreposicao_de_janelas = 12 # sobreposição das janelas em pixels

In [10]:
scms, shapes, transforms, grayscales, sobels, lbps = [], [], [], [], [], []

for f in glob.glob("downloads/imagens/*_RGB*.jp2"):

    scm = re.search('_(.*)_RGB', f).group(1)
    scms.append(scm)
    
    print(f'Processando SCM: {scm}')
    
    dataset = rasterio.open(f)
    transforms.append(dataset.transform)
    shapes.append(dataset.shape) 
    
    grayscale = 0.2125 * dataset.read(1) + \
            0.7154 * dataset.read(2) + \
            0.0721 * dataset.read(3)
    grayscale = grayscale.astype('uint8')
    grayscales.append(grayscale)

#     break


Processando SCM: 3325-334
Processando SCM: 3313-412
Processando SCM: 3315-211
Processando SCM: 3325-332
Processando SCM: 3313-414
Processando SCM: 3325-164
Processando SCM: 3313-432
Processando SCM: 3326-424
Processando SCM: 3316-221
Processando SCM: 3314-311
Processando SCM: 3326-461
Processando SCM: 3315-132


In [11]:
scaler = StandardScaler()
scaler.fit(np.concatenate([x.reshape(-1) for x in grayscales]).reshape(-1, 1))

StandardScaler()

In [12]:
joblib.dump(scaler, 'pre-process/grayscale.scaler.save') 

['pre-process/grayscale.scaler.save']

In [13]:
for k, s in enumerate(scms):
    grayscale = scaler.transform(grayscales[k].reshape(-1, 1)).reshape(grayscales[k].shape)
    with rasterio.open(
        f'pre-process/grayscales/gray-scale-{s}.jp2', 'w',
        driver='GTiff',
        dtype=rasterio.float64,
        count=1,
        width=shapes[k][1],
        height=shapes[k][0],
        transform=transforms[k],
        crs = rasterio.crs.CRS.from_string('EPSG:31983')) as dst:
        dst.write(grayscale, indexes=1)
    print(k, s)
    # break

0 3325-334
1 3313-412
2 3315-211
3 3325-332
4 3313-414
5 3325-164
6 3313-432
7 3326-424
8 3316-221
9 3314-311
10 3326-461
11 3315-132


In [14]:
for k, s in enumerate(scms):
   
    print(f'Processando SCM: {s}')

    sobel_image = filters.sobel(grayscales[k])
    sobels.append(sobel_image)
    
    lbp = local_binary_pattern(grayscales[k], n_points, radius)
    lbps.append(lbp)
    
    # break

Processando SCM: 3325-334
Processando SCM: 3313-412
Processando SCM: 3315-211
Processando SCM: 3325-332
Processando SCM: 3313-414
Processando SCM: 3325-164
Processando SCM: 3313-432
Processando SCM: 3326-424
Processando SCM: 3316-221
Processando SCM: 3314-311
Processando SCM: 3326-461
Processando SCM: 3315-132


In [15]:
for k, s in enumerate(scms):
    # grayscale = scaler.transform(grayscales[k].reshape(-1, 1)).reshape(grayscales[k].shape)
    with rasterio.open(
        f'pre-process/sobel/sobel-{s}.jp2', 'w',
        driver='GTiff',
        dtype=rasterio.float64,
        count=1,
        width=shapes[k][1],
        height=shapes[k][0],
        transform=transforms[k],
        crs = rasterio.crs.CRS.from_string('EPSG:31983')) as dst:
        dst.write(sobels[k], indexes=1)
    with rasterio.open(
        f'pre-process/lbps/lbp-{s}.jp2', 'w',
        driver='GTiff',
        dtype=rasterio.float64,
        count=1,
        width=shapes[k][1],
        height=shapes[k][0],
        transform=transforms[k],
        crs = rasterio.crs.CRS.from_string('EPSG:31983')) as dst:
        dst.write(lbps[k], indexes=1)
    print(k, s)

    # break

0 3325-334
1 3313-412
2 3315-211
3 3325-332
4 3313-414
5 3325-164
6 3313-432
7 3326-424
8 3316-221
9 3314-311
10 3326-461
11 3315-132


In [16]:
scaler = MinMaxScaler(feature_range=(-1, 1))

In [17]:
scaler.fit(np.concatenate([x.reshape(-1) for x in sobels]).reshape(-1, 1))

MinMaxScaler(feature_range=(-1, 1))

In [18]:
joblib.dump(scaler, 'pre-process/sobel.scaler.save') 

['pre-process/sobel.scaler.save']

In [19]:
for k, s in enumerate(scms):
    np.save(f'pre-process/sobel/sobel-{s}.npy', scaler.transform(sobels[k].reshape(-1, 1)).reshape(sobels[k].shape))
    print(s)
    # print(scaler.transform(hogs[k].reshape(-1, 1)).reshape(hogs[k].shape))
    # break

3325-334
3313-412
3315-211
3325-332
3313-414
3325-164
3313-432
3326-424
3316-221
3314-311
3326-461
3315-132


In [20]:
scaler = MinMaxScaler(feature_range=(-1, 1))

In [21]:
scaler.fit(np.concatenate([x.reshape(-1) for x in lbps]).reshape(-1, 1))

MinMaxScaler(feature_range=(-1, 1))

In [22]:
joblib.dump(scaler, 'pre-process/lbp.scaler.save')

['pre-process/lbp.scaler.save']

In [23]:
for k, s in enumerate(scms):
    np.save(f'pre-process/lbps/lbp-{s}.npy', scaler.transform(lbps[k].reshape(-1, 1)).reshape(lbps[k].shape))
    print(s)

3325-334
3313-412
3315-211
3325-332
3313-414
3325-164
3313-432
3326-424
3316-221
3314-311
3326-461
3315-132
