In [5]:
import numpy as np 
import pandas as pd 
import math, os
from sklearn import preprocessing
import gmplot
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, LSTM, Masking, Input, Dropout
from keras.layers.merge import concatenate
from keras.utils import plot_model
from keras.callbacks import ModelCheckpoint
pd.set_option('display.max_columns', None)

In [6]:
def obtain_data():
    dfs={}
    for i in range(19):
        year=str(2000+i)
        file='datos/DesmatamentoMunicipios' + year + '.csv'
        dfs[i] = pd.read_csv(file, encoding = "ISO-8859-1", index_col=0, sep=",")
        #eliminamos columnas irrelevantes
        dfs[i].drop(columns='Latgms', inplace=True)
        dfs[i].drop(columns='Longms', inplace=True)
        dfs[i].drop(columns='CodIbge', inplace=True)
        #df.rename(columns={0:'Latitud', 1:'Longitud', 2:'Municipio', 3:'Estado', 4: 'AreaKm2', 5:'Deforestacion', 6:'Incremento deforestacion', 7:'Bosque', 8:'Nubes', 9:'No observado', 10:'No bosque', 11:'Hidrografia', 12:'Check'} ,inplace=True)
        dfs[i].columns=['Latitud', 'Longitud', 'Municipio', 'Estado', 'Area total', 'Deforestacion ' + year, 'Incremento deforestacion ' + year, 'Bosque ' + year, 'Nubes ' + year, 'No observado ' + year, 'No bosque', 'Hidrografia', 'Check ' + year]
        dfs[i]=dfs[i][['Latitud', 'Longitud', 'Municipio', 'Estado', 'Area total', 'No bosque', 'Hidrografia', 'Deforestacion ' + year, 'Incremento deforestacion ' + year, 'Bosque ' + year, 'Nubes ' + year, 'No observado ' + year, 'Check ' + year]]
    return dfs

In [7]:
def update_2019(df):
    file='datos/terrabrasilis_amazon.csv'
    df_new = pd.read_csv(file, encoding = "UTF-8", sep=",")
    #limpiamos el dataset
    df_new.drop(['geocode_ibge'], axis=1, inplace=True)
    df_new = df_new.groupby(['municipality', 'state', 'year'])['areakm'].agg('sum').reset_index()
    
    estados = {
    'MATO GROSSO': 'MT', 
    'MARANHÃO': 'MA',
    'AMAPÁ': 'AP',
    'RORAIMA': 'RR',
    'AMAZONAS': 'AM',
    'PARÁ': 'PA',
    'RONDÔNIA': 'RO',
    'TOCANTINS': 'TO',
    "ACRE": "AC"
    }
    
    # Mismo formato de nombre de municipio
    for index, row in df_new.iterrows():
        df_new.iloc[index,1] = estados[row['state']]
        df_new.iloc[index,0] = row['municipality'] + " (" + str(df_new.iloc[index,1]) + ")"
        df_new
    
    # 3 casos particulares en los que el nombre tiene pequeñas diferencias entre ambos datasets
    df_new.loc[df_new['municipality']=='SANTA IZABEL DO PARÁ (PA)', 'municipality'] = 'SANTA ISABEL DO PARÁ (PA)'
    df_new.loc[df_new['municipality']=='ELDORADO DO CARAJÁS (PA)', 'municipality'] = 'ELDORADO DOS CARAJÁS (PA)'
    df_new.loc[df_new['municipality']=='POXORÉU (MT)', 'municipality'] = 'POXORÉO (MT)'
        
    # Upper del municipio para que tengan el mismo formato
    for index, row in df.iterrows():
        df.iloc[index,0] = df.iloc[index,0].upper()
        
    # Actualizamos los datos antiguos con los nuevos
    for index, row in df_new.iterrows():
        inc = 'Incremento deforestacion ' + str(row['year'])
        if row['year']==2019:
            df.loc[df['Municipio']==row['municipality'], 'Deforestacion 2019'] = df.loc[df['Municipio']==row['municipality'], 'Deforestacion 2018'] + row['areakm']
            df.loc[df['Municipio']==row['municipality'], inc] = row['areakm']
            df.loc[df['Municipio']==row['municipality'], 'Bosque 2019'] = df.loc[df['Municipio']==row['municipality'], 'Bosque 2018'] - row['areakm']
            df.loc[df['Municipio']==row['municipality'], 'Nubes 2019'] = 0
            df.loc[df['Municipio']==row['municipality'], 'No observado 2019'] = 0
            df.loc[df['Municipio']==row['municipality'], 'Check 2019'] = 100
        else:
            df.loc[df['Municipio']==row['municipality'], inc] = row['areakm']
            
    total=0
    # Si no hay dato de un municipio en 2019, asumimos que no ha habido deforestacion
    for index, row in df.iterrows():
        if math.isnan(row[-1]):
            #total+=1
            df.loc[df['Municipio']==row['Municipio'], 'Deforestacion 2019'] = row['Deforestacion 2018']
            df.loc[df['Municipio']==row['Municipio'], 'Incremento deforestacion 2019'] = 0
            df.loc[df['Municipio']==row['Municipio'], 'Bosque 2019'] = row['Bosque 2018']
            df.loc[df['Municipio']==row['Municipio'], 'Nubes 2019'] =  row['Nubes 2018']
            df.loc[df['Municipio']==row['Municipio'], 'No observado 2019'] = row['No observado 2018']
            df.loc[df['Municipio']==row['Municipio'], 'Check 2019'] = 100
    
    #print("--------- Municipios sin variaciones en 2019 (sus incrementos del año anterior son <5km^2, así que tiene sentido):", total)
    
    return df

In [8]:
def create_df(dfs):
    df=dfs[0]
    dic={}
    for idx, row in df.iterrows():
            if row['Municipio'] in dic:
                dic[row['Municipio']]+=1
            else:
                dic[row['Municipio']]=1

    repetidos=[]
    for key, value in dic.items():
        if value==2:
            repetidos.append(key)

    rep= df.Municipio.isin(repetidos)
    mismo_municipio =  df[rep]
    #print("Hay algunos municipios con el mismo nombre en 2 estados distintos:")
    #print(mismo_municipio.loc[:,'Latitud':'Area total'])

    df=dfs[0]
    for idx, value in dfs.items():
        value['Municipio']=value['Municipio'] + " (" + value['Estado'] + ")"
        if idx > 0:
            value.drop(columns='Latitud', inplace=True)
            value.drop(columns='Longitud', inplace=True)
            value.drop(columns='Estado', inplace=True)
            value.drop(columns='Area total', inplace=True)
            value.drop(columns='Hidrografia', inplace=True)
            value.drop(columns='No bosque', inplace=True)
            df=pd.merge(df, value, on='Municipio')

    # reorder columns
    cols = df.columns.tolist()
    cols = cols[2:4] + cols[0:2] + cols[4:]
    df = df[cols]
    
    df = update_2019(df)
    return df

In [9]:
dfs = obtain_data()
df = create_df(dfs)
df.head()

Unnamed: 0,Municipio,Estado,Latitud,Longitud,Area total,No bosque,Hidrografia,Deforestacion 2000,Incremento deforestacion 2000,Bosque 2000,Nubes 2000,No observado 2000,Check 2000,Deforestacion 2001,Incremento deforestacion 2001,Bosque 2001,Nubes 2001,No observado 2001,Check 2001,Deforestacion 2002,Incremento deforestacion 2002,Bosque 2002,Nubes 2002,No observado 2002,Check 2002,Deforestacion 2003,Incremento deforestacion 2003,Bosque 2003,Nubes 2003,No observado 2003,Check 2003,Deforestacion 2004,Incremento deforestacion 2004,Bosque 2004,Nubes 2004,No observado 2004,Check 2004,Deforestacion 2005,Incremento deforestacion 2005,Bosque 2005,Nubes 2005,No observado 2005,Check 2005,Deforestacion 2006,Incremento deforestacion 2006,Bosque 2006,Nubes 2006,No observado 2006,Check 2006,Deforestacion 2007,Incremento deforestacion 2007,Bosque 2007,Nubes 2007,No observado 2007,Check 2007,Deforestacion 2008,Incremento deforestacion 2008,Bosque 2008,Nubes 2008,No observado 2008,Check 2008,Deforestacion 2009,Incremento deforestacion 2009,Bosque 2009,Nubes 2009,No observado 2009,Check 2009,Deforestacion 2010,Incremento deforestacion 2010,Bosque 2010,Nubes 2010,No observado 2010,Check 2010,Deforestacion 2011,Incremento deforestacion 2011,Bosque 2011,Nubes 2011,No observado 2011,Check 2011,Deforestacion 2012,Incremento deforestacion 2012,Bosque 2012,Nubes 2012,No observado 2012,Check 2012,Deforestacion 2013,Incremento deforestacion 2013,Bosque 2013,Nubes 2013,No observado 2013,Check 2013,Deforestacion 2014,Incremento deforestacion 2014,Bosque 2014,Nubes 2014,No observado 2014,Check 2014,Deforestacion 2015,Incremento deforestacion 2015,Bosque 2015,Nubes 2015,No observado 2015,Check 2015,Deforestacion 2016,Incremento deforestacion 2016,Bosque 2016,Nubes 2016,No observado 2016,Check 2016,Deforestacion 2017,Incremento deforestacion 2017,Bosque 2017,Nubes 2017,No observado 2017,Check 2017,Deforestacion 2018,Incremento deforestacion 2018,Bosque 2018,Nubes 2018,No observado 2018,Check 2018,Deforestacion 2019,Incremento deforestacion 2019,Bosque 2019,Nubes 2019,No observado 2019,Check 2019
0,PARAGOMINAS (PA),PA,-3.12356,-47.40354,19465,7.1,36.0,7212.3,,12208.2,0.0,1.4,100,7322.3,110.0,12098.2,0.0,1.4,100,7473.1,150.8,11947.0,0.4,1.4,100,7567.3,94.2,11853.2,0.0,1.4,100,7832.6,265.3,11587.9,0.0,1.4,100,8166.1,333.5,11253.9,0.5,1.4,100,8257.6,91.5,8732.4,2430.5,1.4,100,8349.1,91.5,10645.2,422.6,5.0,100,8415.4,63.339999,9094.9,1904.5,7.1,99,8536.7,120.407843,10703.6,121.8,59.8,100,8605.0,67.920908,10036.8,776.0,4.1,100,8641.0,35.098496,10301.9,474.8,4.2,100,8658.7,17.080393,7272.8,3486.4,4.0,99,8689.5,30.552451,7623.6,3105.0,3.8,100,8709.5,19.701243,7531.4,3179.6,1.4,99,8733.7,23.591894,9587.8,1100.4,0.0,100,8753.5,19.856212,9640.6,1027.8,0.0,100,8773.3,19.358134,10630.7,17.9,0.0,99,8791.7,18.048503,5999.4,4630.8,0.0,100,8817.470091,25.770091,5973.629909,0.0,0.0,100.0
1,SÃO FÉLIX DO XINGU (PA),PA,-7.46114,-52.46225,84253,3788.0,939.0,7037.4,,72482.1,0.0,6.5,100,8727.7,1690.4,70791.8,0.0,6.5,100,9993.2,1265.5,69526.3,0.0,6.5,100,11311.3,1318.0,68208.2,0.0,6.5,100,12393.8,1082.5,67125.7,0.0,6.5,100,13801.7,1407.8,65717.8,0.0,6.5,100,14563.6,761.9,64955.7,0.2,6.5,100,15441.1,877.5,63083.3,995.0,6.6,100,16206.2,759.279867,63179.6,133.7,6.5,100,16650.6,439.573858,60981.6,1885.2,8.6,100,17004.3,349.323879,59384.3,3125.9,11.5,99,17144.8,139.289527,62255.5,114.1,11.6,100,17313.9,167.598147,62168.7,31.9,11.5,100,17534.3,220.771803,61444.3,535.3,12.1,100,17686.2,152.48212,61703.7,129.6,6.5,100,17885.4,199.337304,61429.4,211.2,0.0,100,18200.4,314.62515,61325.4,0.2,0.0,100,18440.1,236.685057,61085.9,0.0,0.0,100,18733.9,282.100349,58877.5,1914.6,0.0,99,19278.217548,544.317548,58333.182452,0.0,0.0,100.0
2,MARABÁ (PA),PA,-5.52503,-49.94418,15161,36.2,86.0,5818.9,,9218.2,0.0,1.7,100,6187.5,368.6,8849.6,0.0,1.7,100,6442.8,255.3,8594.3,0.0,1.7,100,6907.8,465.0,8129.3,0.0,1.7,100,7154.5,246.7,7882.6,0.0,1.7,100,7282.7,128.2,7617.7,136.7,1.7,100,7541.5,258.9,7380.2,115.4,1.7,100,7707.6,166.1,6680.3,649.2,1.7,100,8059.1,348.091287,6977.3,0.0,2.4,100,8170.7,110.957477,6720.5,145.9,1.7,99,8250.5,78.81192,6751.7,31.4,5.2,99,8315.4,64.660868,6636.3,81.6,5.5,100,8368.2,52.629468,6643.6,21.8,5.2,100,8448.9,80.785697,6578.9,5.6,5.4,100,8487.4,38.507819,6101.2,448.5,1.7,99,8533.5,45.482091,5913.0,592.3,0.0,100,8633.2,98.670689,6404.9,0.7,0.0,99,8675.4,41.708496,6359.1,4.3,0.0,99,8726.8,50.239026,6195.2,116.8,0.0,99,8794.543027,67.743027,6127.456973,0.0,0.0,100.0
3,JUARA (MT),MT,-11.22873,-57.64737,22666,1600.6,127.0,5520.7,,15388.4,0.0,29.3,100,5752.4,231.7,15156.7,0.0,29.3,100,6179.2,426.9,14729.9,0.0,29.3,100,6534.1,354.9,14375.0,0.0,29.3,100,6896.0,361.9,14013.1,0.0,29.3,99,7314.4,418.3,13594.7,0.0,29.3,100,7525.5,211.1,13378.8,4.8,29.3,100,7642.9,117.4,13295.1,0.0,0.4,100,7767.4,124.323051,13170.6,0.0,0.4,100,7798.4,30.817055,12978.9,160.7,0.4,99,7813.6,15.178736,11212.9,1911.6,0.3,99,7871.3,56.999607,13066.8,0.0,0.3,100,7887.2,15.794633,13050.6,0.3,0.3,100,7902.9,15.867825,13035.2,0.0,0.3,100,7930.6,27.947033,12749.8,257.7,0.3,100,7976.9,46.090771,12961.5,0.0,0.0,99,8015.7,38.537974,12922.7,0.0,0.0,99,8033.8,21.482832,12904.6,0.0,0.0,99,8053.0,25.917882,12885.4,0.0,0.0,100,8138.314375,85.314375,12800.085625,0.0,0.0,100.0
4,SANTA LUZIA (MA),MA,-4.29175,-45.93682,6245,0.0,0.0,5373.3,,871.3,0.0,0.4,100,5411.0,37.7,833.6,0.0,0.4,100,5534.9,123.8,709.7,0.0,0.4,100,5559.7,24.8,683.6,1.3,0.4,100,5587.1,27.4,648.4,9.1,0.4,100,5591.4,4.4,606.0,47.2,0.4,100,5599.9,8.4,521.5,123.2,0.4,100,5627.1,27.2,617.5,0.0,0.4,100,5677.3,37.716142,566.7,0.7,0.3,99,5696.4,14.766779,545.9,2.3,0.4,100,5703.9,4.525176,539.2,1.2,0.7,100,5710.8,3.928206,533.5,0.0,0.7,100,5713.9,1.05445,519.6,10.8,0.7,100,5714.9,0.756857,284.3,245.0,0.8,99,5719.2,1.300141,502.4,23.0,0.4,100,5720.8,1.311436,320.1,204.1,0.0,100,5731.8,5.028846,474.5,38.7,0.0,100,5738.3,3.116973,500.5,6.2,0.0,100,5742.1,1.390725,227.3,275.6,0.0,100,5746.116547,4.016547,223.283453,0.0,0.0,100.0


In [10]:
def preprocess(temporal, fixed_vars, n_vars_temp, scaler, scaler2, scaler3):
    # data structure for LSTM
    # normalize features
    x = temporal.values
    x = scaler.fit_transform(x)
    # structure in arrays to be the input of the LSTM
    vars_lstm=[]
    municipio_len=len(x[0])
    for mun in x:
        municipio=[]
        j=0
        while j < municipio_len:
            m=mun[j:j+n_vars_temp]
            municipio.append(mun[j:j+n_vars_temp])
            j+=n_vars_temp
        vars_lstm.append(municipio)

    vars_lstm=np.array(vars_lstm)
    X1 = vars_lstm
    
    fixed_vars = pd.concat([fixed_vars,pd.get_dummies(fixed_vars['Estado'])],axis=1)
    # Drop column as it is now encoded
    fixed_vars = fixed_vars.drop('Estado',axis = 1)
    scaler3 = MinMaxScaler(feature_range=(0, 1))
    x = scaler3.fit_transform(fixed_vars.iloc[:,1:]) # nombre del municipio en la primera columna
    X2 = np.array(x)
    return X1, X2

In [11]:
def show_results(y_pred):
    y_pred=scaler2.inverse_transform(y_pred).flatten()
    results = {'Municipio': municipios_reales.values, 'Predicted': y_pred}
    results = pd.DataFrame(results)
    print(results)
    return y_pred

In [12]:
# Predecimos para los proximos n años
n=10
model = load_model("my_keras_model_production.h5")

fixed_vars = df.loc[:,['Municipio', 'Estado', 'Latitud', 'Longitud', 'Area total', 'Hidrografia', 'No bosque']]
municipios_reales = fixed_vars.iloc[:,0]

for i in range(n):
    # Cogemos los datos de la ventana correspondiente para predecir la deforestación en el próximo año
    year_ini=2+i
    temporal_vars = df.loc[:,'Deforestacion 2000':]
    temporal = temporal_vars.iloc[:, n_vars_temp*year_ini:]
    print(temporal.head())
    X1, X2 = preprocess(temporal, fixed_vars, n_vars_temp, scaler, scaler2, scaler3)
    y_pred = model.predict([X1, X2])
    y_pred = show_results(y_pred)
    # Incluimos los datos predichos en el dataframe
    df['Deforestacion ' + str(2020+i)]=df['Deforestacion ' + str(2020+i-1)] + y_pred
    df['Incremento deforestacion ' + str(2020+i)]=y_pred
    df['Bosque ' + str(2020+i)]=df['Bosque ' + str(2019+i-1)] - y_pred
    df['Nubes ' + str(2020+i)]=np.zeros(760)
    df['No observado ' + str(2020+i)]=np.zeros(760)
    df['Check ' + str(2020+i)]=np.ones(760)*100

W0301 20:42:01.882991 140662996600640 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:541: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0301 20:42:01.900947 140662996600640 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:66: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0301 20:42:01.908965 140662996600640 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4432: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0301 20:42:01.988314 140662996600640 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:148: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0301 20:42:01.996835 

NameError: name 'n_vars_temp' is not defined

In [None]:
y_pred

In [None]:
df