In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyRegressor
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn import tree
from sklearn.neighbors import KNeighborsClassifier

from itertools import product
from matplotlib import pyplot as plt

In [3]:
#Dados do Cerrado Brasileiro, armazenados mensalmente via .csv
arquivos = ['janeiro.csv','fev.csv','marco.csv','abril.csv','maio.csv','junho.csv','julho.csv']

In [4]:
def read_csv(list_of_files):
    dfs = []
    for x in list_of_files:
        file = pd.read_csv(x, sep=',')
        dfs.append(file)
    df = pd.concat(dfs).reset_index(drop=True)
        
    return df

In [5]:
meses = read_csv(arquivos)

In [6]:
#Dados trabalhados para a eliminação de NaN nas colunas e identificação dos tipos de dados
meses.convert_dtypes()
meses.dropna(how='any', inplace=True)

In [7]:
meses.drop(columns=['Unnamed: 0','estado', 'satelite', 'municipio', 'pais', 'municipio_id', 'estado_id', 'pais_id', 'bioma'], axis = 1, inplace=True)

In [8]:
meses_fogo = meses['risco_fogo'] > 0
meses = meses[meses_fogo]

In [9]:
meses = meses.reset_index()
del meses['index']
display(meses)

Unnamed: 0,lat,lon,data_hora_gmt,numero_dias_sem_chuva,precipitacao,risco_fogo
0,-12.314859,-43.234066,2022-03-02 01:05:48,9.0,2.7,1.0
1,-20.227880,-46.415940,2022-03-02 01:28:00,8.0,0.0,0.3
2,-20.229250,-46.426820,2022-03-02 01:28:00,8.0,0.0,0.3
3,-20.238930,-46.425510,2022-03-02 01:28:00,8.0,0.0,0.4
4,-20.237550,-46.414600,2022-03-02 01:28:00,8.0,0.0,0.4
...,...,...,...,...,...,...
265136,-6.360000,-45.670000,2022-07-29 23:54:47,46.0,0.0,1.0
265137,-6.360000,-45.690000,2022-07-29 23:54:47,46.0,0.0,1.0
265138,-6.400000,-44.140000,2022-07-29 23:54:47,34.0,0.0,1.0
265139,-6.350000,-45.670000,2022-07-29 23:54:47,46.0,0.0,1.0


In [10]:
logic = meses["risco_fogo"] <= 0.2
print(len(meses.loc[logic]),'Risco Baixo')
logic = (meses["risco_fogo"] > 0.2) & (meses["risco_fogo"]<= 0.4)
print(len(meses.loc[logic]),'Risco Baixo-Médio')
logic = (meses["risco_fogo"] > 0.4) & (meses["risco_fogo"] <= 0.6)
print(len(meses.loc[logic]),'Risco Médio')
logic = (meses["risco_fogo"] > 0.6) & (meses["risco_fogo"] <= 0.8)
print(len(meses.loc[logic]),'Risco Médio-Alto')
logic = (meses["risco_fogo"] > 0.8)
print(len(meses.loc[logic]),'Risco Alto')

13853 Risco Baixo
16018 Risco Baixo-Médio
19324 Risco Médio
23407 Risco Médio-Alto
192539 Risco Alto


In [11]:
meses['Risco Categórico'] = None

logic = meses["risco_fogo"] <= 0.2
meses.loc[logic, 'Risco Categórico'] = 'Risco Baixo'
logic = (meses["risco_fogo"] > 0.2) & (meses["risco_fogo"]<= 0.4)
meses.loc[logic, 'Risco Categórico'] = 'Risco Médio-Baixo'
logic = (meses["risco_fogo"] > 0.4) & (meses["risco_fogo"] <= 0.6)
meses.loc[logic, 'Risco Categórico'] = 'Risco Médio'
logic = (meses["risco_fogo"] > 0.6) & (meses["risco_fogo"] <= 0.8)
meses.loc[logic, 'Risco Categórico'] = 'Risco Médio-Alto'
logic = (meses["risco_fogo"] > 0.8)
meses.loc[logic, 'Risco Categórico'] = 'Risco Alto'
print(meses)


              lat        lon        data_hora_gmt  numero_dias_sem_chuva  \
0      -12.314859 -43.234066  2022-03-02 01:05:48                    9.0   
1      -20.227880 -46.415940  2022-03-02 01:28:00                    8.0   
2      -20.229250 -46.426820  2022-03-02 01:28:00                    8.0   
3      -20.238930 -46.425510  2022-03-02 01:28:00                    8.0   
4      -20.237550 -46.414600  2022-03-02 01:28:00                    8.0   
...           ...        ...                  ...                    ...   
265136  -6.360000 -45.670000  2022-07-29 23:54:47                   46.0   
265137  -6.360000 -45.690000  2022-07-29 23:54:47                   46.0   
265138  -6.400000 -44.140000  2022-07-29 23:54:47                   34.0   
265139  -6.350000 -45.670000  2022-07-29 23:54:47                   46.0   
265140  -6.340000 -45.690000  2022-07-29 23:54:47                   45.0   

        precipitacao  risco_fogo   Risco Categórico  
0                2.7         1.0 

In [12]:
TAMANHO_TESTE = 0.1 # fração de dados utilizada para teste: 10%
SEMENTE_ALEATORIA = 61455 # semente escolhida aleatoriamente
DATASET_NAME = "meses"
FEATURES = ["numero_dias_sem_chuva", "precipitacao", "lat", "lon"]
TARGET = ["Risco Categórico"]

indices = meses.index
indices_treino, indices_teste = train_test_split(
    indices, test_size=TAMANHO_TESTE, random_state=SEMENTE_ALEATORIA
)

meses_treino = meses.loc[indices_treino]
meses_teste = meses.loc[indices_teste]

X_treino = meses_treino.reindex(FEATURES, axis=1).values
y_treino = meses_treino.reindex(TARGET, axis=1).values
X_teste = meses_teste.reindex(FEATURES, axis=1).values
y_teste = meses_teste.reindex(TARGET, axis=1).values

In [13]:
print(y_treino.shape, y_teste.shape)
y_treino = y_treino.ravel()
y_teste = y_teste.ravel()
print(y_treino.shape, y_teste.shape)

# cria o modelo
modelo_knn = KNeighborsClassifier()

# treina o modelo
modelo_knn.fit(X_treino,y_treino)

# realiza uma previsão usando o modelo treinado
previsao = modelo_knn.predict(X_teste)
print(previsao)

(238626, 1) (26515, 1)
(238626,) (26515,)
['Risco Médio-Alto' 'Risco Médio-Alto' 'Risco Alto' ... 'Risco Alto'
 'Risco Alto' 'Risco Alto']


In [14]:
from sklearn import metrics

print("Accuracy:",metrics.accuracy_score(y_teste, previsao))

Accuracy: 0.9105034885913634
