## Geração dos Filtros de extração de características

## Features Extraction

In [None]:
import numpy as np
import cv2
import pandas as pd

In [None]:
# Leitura da imagem de satélite

img = cv2.imread('/GS_2500x2500.tif')
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

EXTRAÇÃO DE FEATURES
  - Original image pixels
  - Gabor features           
  - CANNY EDGE
  - ROBERTS EDGE
  - SOBEL
  - SCHARR
  - PREWITT
  - GAUSSIAN with sigma=3
  - GAUSSIAN with sigma=7
  - MEDIAN with sigma=3

In [None]:
# Salvar imagem original
img2 = img.reshape(-1)
df = pd.DataFrame()
df['Original Image'] = img2

Geração dos Filtros de extração de características

**Créditos do código**: DigitalSreeni (https://www.youtube.com/watch?v=QEz4bG9P3Qs&t=1179s)

In [None]:

#Generate Gabor features
num = 1  #To count numbers up in order to give Gabor features a lable in the data frame
kernels = []
for theta in range(2):   #Define number of thetas
    theta = theta / 4. * np.pi
    for sigma in (1, 3):  #Sigma with 1 and 3
        for lamda in np.arange(0, np.pi, np.pi / 4):   #Range of wavelengths
            for gamma in (0.05, 0.5):   #Gamma values of 0.05 and 0.5


                gabor_label = 'Gabor' + str(num)  #Label Gabor columns as Gabor1, Gabor2, etc.
#                print(gabor_label)
                ksize=9
                kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
                kernels.append(kernel)
                #Now filter the image and add values to a new column
                fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
                filtered_img = fimg.reshape(-1)
                df[gabor_label] = filtered_img  #Labels columns as Gabor1, Gabor2, etc.
                print(gabor_label, ': theta=', theta, ': sigma=', sigma, ': lamda=', lamda, ': gamma=', gamma)
                num += 1  #Increment for gabor column label

Gabor1 : theta= 0.0 : sigma= 1 : lamda= 0.0 : gamma= 0.05
Gabor2 : theta= 0.0 : sigma= 1 : lamda= 0.0 : gamma= 0.5
Gabor3 : theta= 0.0 : sigma= 1 : lamda= 0.7853981633974483 : gamma= 0.05
Gabor4 : theta= 0.0 : sigma= 1 : lamda= 0.7853981633974483 : gamma= 0.5
Gabor5 : theta= 0.0 : sigma= 1 : lamda= 1.5707963267948966 : gamma= 0.05
Gabor6 : theta= 0.0 : sigma= 1 : lamda= 1.5707963267948966 : gamma= 0.5
Gabor7 : theta= 0.0 : sigma= 1 : lamda= 2.356194490192345 : gamma= 0.05
Gabor8 : theta= 0.0 : sigma= 1 : lamda= 2.356194490192345 : gamma= 0.5
Gabor9 : theta= 0.0 : sigma= 3 : lamda= 0.0 : gamma= 0.05
Gabor10 : theta= 0.0 : sigma= 3 : lamda= 0.0 : gamma= 0.5
Gabor11 : theta= 0.0 : sigma= 3 : lamda= 0.7853981633974483 : gamma= 0.05
Gabor12 : theta= 0.0 : sigma= 3 : lamda= 0.7853981633974483 : gamma= 0.5
Gabor13 : theta= 0.0 : sigma= 3 : lamda= 1.5707963267948966 : gamma= 0.05
Gabor14 : theta= 0.0 : sigma= 3 : lamda= 1.5707963267948966 : gamma= 0.5
Gabor15 : theta= 0.0 : sigma= 3 : lamda= 2

In [None]:
df.shape

(25000000, 33)

In [None]:
# GERACAO DOS OUTROS FILTROS

#CANNY EDGE
edges = cv2.Canny(img, 100,200)
edges1 = edges.reshape(-1)
df['Canny Edge'] = edges1

from skimage.filters import roberts, sobel, scharr, prewitt

#ROBERTS EDGE
edge_roberts = roberts(img)
edge_roberts1 = edge_roberts.reshape(-1)
df['Roberts'] = edge_roberts1

#SOBEL
edge_sobel = sobel(img)
edge_sobel1 = edge_sobel.reshape(-1)
df['Sobel'] = edge_sobel1

#SCHARR
edge_scharr = scharr(img)
edge_scharr1 = edge_scharr.reshape(-1)
df['Scharr'] = edge_scharr1

#PREWITT
edge_prewitt = prewitt(img)
edge_prewitt1 = edge_prewitt.reshape(-1)
df['Prewitt'] = edge_prewitt1

#GAUSSIAN with sigma=3
from scipy import ndimage as nd
gaussian_img = nd.gaussian_filter(img, sigma=3)
gaussian_img1 = gaussian_img.reshape(-1)
df['Gaussian s3'] = gaussian_img1

#GAUSSIAN with sigma=7
gaussian_img2 = nd.gaussian_filter(img, sigma=7)
gaussian_img3 = gaussian_img2.reshape(-1)
df['Gaussian s7'] = gaussian_img3

#MEDIAN with sigma=3
median_img = nd.median_filter(img, size=3)
median_img1 = median_img.reshape(-1)
df['Median s3'] = median_img1

#VARIANCE with size=3
# variance_img = nd.generic_filter(img, np.var, size=3)
# variance_img1 = variance_img.reshape(-1)
# df['Variance s3'] = variance_img1  #Add column to original dataframe

### Consolidação da base de treino



In [None]:
#Now, add a column in the data frame for the Labels
#For this, we need to import the labeled image
labeled_img = cv2.imread('/content/drive/MyDrive/TCC_IMG/mask_polig_2500_2500_OSM_r05.tif')
labeled_img = labeled_img
#Remember that you can load an image with partial labels
#But, drop the rows with unlabeled data

labeled_img = cv2.cvtColor(labeled_img, cv2.COLOR_BGR2GRAY)
labeled_img1 = labeled_img.reshape(-1)
df['Labels'] = labeled_img1

df.head()


Unnamed: 0,Original Image,Gabor1,Gabor2,Gabor3,Gabor4,Gabor5,Gabor6,Gabor7,Gabor8,Gabor9,...,Gabor32,Canny Edge,Roberts,Sobel,Scharr,Prewitt,Gaussian s3,Gaussian s7,Median s3,Labels
0,92,0,0,255,255,160,94,65,38,0,...,0,0,0.062745,0.044367,0.044367,0.044367,101,103,92,0
1,108,0,0,255,255,160,93,65,38,0,...,46,0,0.066667,0.091508,0.091508,0.091508,100,102,108,0
2,125,0,0,255,255,156,88,63,36,0,...,70,0,0.180392,0.080416,0.080416,0.080416,98,101,108,0
3,79,0,0,255,254,151,82,61,33,0,...,35,255,0.015686,0.138648,0.138648,0.138648,96,100,79,0
4,75,0,0,255,238,148,77,60,31,0,...,48,0,0.0,0.011092,0.011092,0.011092,93,99,75,0


## Salvando dataframe com filtros, para nao precisar rodalos novamente

In [None]:
!pip install fastparquet

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting fastparquet
  Downloading fastparquet-2023.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.6 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.4/1.6 MB[0m [31m10.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
Collecting cramjam>=2.3
  Downloading cramjam-2.6.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m73.1 MB/s[0m eta [36m0:00:00[0m
Collecting pandas>=1.5.0
  Downloading pandas-1.5.3-cp39-c

In [None]:
# salvar dataset com todas as features
df.to_parquet("/ga_GS2500_maskOSM_amostra2_1245_2490.parquet", engine='fastparquet') # 42 colunas , 25M linhas