<a href="https://colab.research.google.com/github/Shaielyfs/Classifica_de_Uso_e_Cobertura_do_Solo_Sentinel-2_GEE_ML/blob/main/clas_uso_solo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Objetivo:Desenvolver um pipeline completo para mapeamento de Uso e Cobertura do Solo (LULC) utilizando imagens Sentinel-2 obtidas via Google Earth Engine, processadas no Google Colab e classificadas com Machine Learning (Random Forest).

In [1]:
# instalar google earth engine
!pip install geemap rasterio scikit-learn matplotlib -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.6/1.6 MB[0m [31m47.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# ativar o GEE
import geemap
import ee
ee.Authenticate()     #autenticar a conta
ee.Initialize(project="classi-uso-e-cobertura-do-solo")

In [3]:
# definir área de estudo
roi = ee.Geometry.Polygon(
    [[-48.58867069419273,-25.700372792794713],
      [-48.338045083841166,-25.700372792794713],
      [-48.338045083841166,-25.497880231775763],
      [-48.58867069419273,-25.497880231775763],
      ])

image = (
    ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
    .filterBounds(roi)
    .filterDate("2023-01-01", "2023-12-31")
    .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", 10))
    .median()
    .clip(roi)
)

In [4]:
# selecionar as bandas
bands = ['B2', 'B3', 'B4', 'B8']
# Azul (água, atm), Verde (vegetação), Vermelho (vegetação, solo), NIR (biomassa vegetal)

# índices espectrais que realçam padrões ambientais
ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
 #vegetação ndvi = (NIR - Red)/(NIR + Red)

image = image.select(bands).addBands(ndvi)

In [5]:
#exportar a imagem para o colab
import os

output_dir = "data/raw"
# Criar o diretório se ele não existir
os.makedirs(output_dir, exist_ok=True)

geemap.ee_export_image(
    image,
    filename=os.path.join(output_dir, "sentinel_2.tif"),
    scale=30,        # Aumentar a escala para reduzir o tamanho do arquivo
    region=roi,      #área de estudo
    file_per_band=False    #tudo em único GeoTIFF
)

Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/classi-uso-e-cobertura-do-solo/thumbnails/9abba4b07f5cc6188fbbd84bcc6ea627-9046f496162627ef68d61c7b3b57355a:getPixels
Please wait ...
Data downloaded to /content/data/raw/sentinel_2.tif


In [6]:
#verificando a exportação
import rasterio

filename = "data/raw/sentinel_2.tif" # Definir a variável filename

with rasterio.open(filename) as src:
    print(src.count)       #n de banda
    print(src.shape)       #dimensão
    print(src.crs)         #sist de coorden



5
(753, 931)
EPSG:4326


In [7]:
#importar as amostras geradas no GEE
import pandas as pd

samples = pd.read_csv("/content/drive/MyDrive/training_samples.csv")

X = samples.drop(columns=['class'])
y = samples['class']

In [9]:
#Treinamento e validação do com Machine Learning
#importar as bibliotecas
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [10]:
#Ler as amostras
samples = pd.read_csv("/content/drive/MyDrive/training_samples.csv")

samples.head()


Unnamed: 0,system:index,B2,B3,B4,B8,NDVI,NDWI,class,.geo
0,1_1_1_0_0,132.0,119.0,117.0,63.0,-0.3,0.307692,1,"{""type"":""MultiPoint"",""coordinates"":[]}"
1,1_1_1_0_1,131.0,127.0,135.0,58.0,-0.398964,0.372973,1,"{""type"":""MultiPoint"",""coordinates"":[]}"
2,1_1_1_0_2,207.0,246.0,192.0,108.0,-0.28,0.389831,1,"{""type"":""MultiPoint"",""coordinates"":[]}"
3,1_1_1_0_3,210.0,275.0,190.0,105.0,-0.288136,0.447368,1,"{""type"":""MultiPoint"",""coordinates"":[]}"
4,1_1_1_0_4,122.0,156.0,163.0,274.0,0.254005,-0.274419,1,"{""type"":""MultiPoint"",""coordinates"":[]}"


In [16]:
#separar variáveis e rótulos , eliminar coluna geo
X = samples.drop(columns=['class', '.geo'])
y = samples['class']

In [17]:
#dividir treino e teste
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.3,      #30% para teste
    random_state=42,    #Controla a aleatoriedade da divisão
    stratify=y          #mantém proporção das classes
)

In [18]:
#treinar o randon forest
rf = RandomForestClassifier(
    n_estimators=300,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train, y_train)

In [19]:
#avaliar o modelo
y_pred = rf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9444444444444444
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         5
           2       1.00      1.00      1.00         4
           3       0.83      1.00      0.91         5
           4       1.00      0.75      0.86         4

    accuracy                           0.94        18
   macro avg       0.96      0.94      0.94        18
weighted avg       0.95      0.94      0.94        18

