In [101]:
# Core libs
import pandas as pd
import numpy as np

# Graphic libs
import plotly.express as px
import plotly.io as pio
import matplotlib.pyplot as plt
pio.renderers.default = "browser"

# sklearn 
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# IA + blockchain = profit
import tensorflow as tf
from keras import optimizers
from keras.models import Model
from keras.layers import Input, Lambda, Dropout, BatchNormalization, Activation, Dense, LeakyReLU
from keras.layers.merging import Add, Concatenate
from keras.utils import plot_model, to_categorical

# Training

In [102]:
## Custom metric
def mape_accuracy(y_true, y_pred):
    correct = tf.reduce_sum(tf.cast( tf.abs((y_true - y_pred)/y_pred) < 0.1, tf.int32), axis=1)
    total = y_true.shape[1]

    return (correct/total)*100

def mape_accuracy_log10(y_true, y_pred):
    correct = tf.reduce_sum(tf.cast(tf.abs(tf.divide(tf.subtract(tf.pow(tf.constant(10, dtype=tf.float32),y_true),tf.pow(tf.constant(10, dtype=tf.float32),y_pred)),tf.pow(tf.constant(10, dtype=tf.float32),y_true))) < 0.1, tf.int32), axis=1)
    #correct = tf.reduce_sum(tf.cast(tf.abs(tf.divide(tf.subtract((y_true + out_center)*out_scale ,(y_pred + out_center)*out_scale),y_true)) < 0.1, tf.int32), axis=1)
    total = y_true.shape[1]

    return (correct/total)*100

## Loading data

In [103]:
NOW = 'test_final'

In [163]:
raw_data_master = pd.read_excel('../data/xlsx/ord20p.xlsx', header = [0]).applymap(lambda x : (x - pd.Timestamp("1900-01-01 00:00:00")).days + 2 if isinstance(x,pd.Timestamp) else x)

In [178]:
#raw_data = raw_data_master[raw_data_master['Provincia'] == 'Chiclayo'].append(raw_data_master[raw_data_master['Provincia'] == 'Prov. Const. del Callao']).append(raw_data_master[raw_data_master['Provincia'] == 'Arequipa']).append(raw_data_master[raw_data_master['Provincia'] == 'Trujillo']).append(raw_data_master[raw_data_master['Provincia'] == 'Lima'])
#raw_data = raw_data_master.drop(raw_data.index)

MODEL = 'LimaPROV'
raw_data = raw_data_master[raw_data_master['Provincia'] == 'Arequipa']

categoria_bien = {"Local Comercial" : "1",
                  "Departamento" : "2",
                  "Vivienda Unifamiliar" : "3",
                  "Industria" : "4",
                  "Estacionamiento/depósito (U.I.)" : "5",
                  "AVALUOS_TIPOS_INMUEBLE_VEHICULO" : "6",
                  "Intitución Educativa" : "7",
                  "Terreno Urbano" : "8",
                  "Almacén /Taller" : "9",
                  "Oficina" : "10",
                  "Hotel" : "11",
                  "Fundo Agrícola" : "12"}

conservacion = {"En proyecto" : "1",
                "En construcción" : "2",
                "Regular" : "3",
                "Bueno" : "4",
                "Muy bueno" : "5"}

metodo = {"Costos o reposición (directo)" : "1",
          "Comparación de mercado (directo)" : "2",
          "Renta o capitalización (indirecto)" : "3"}

colnames = ["FechadelInforme",
            "TipodeVia",
            "Estacionamiento",
            "Categoriadelbien",
            "Posicion",
            "Numerodefrentes",
            "Edad",
            "Elevador",
            "Estadodeconservacion",
            "MetodoRepresentado",
            "Areaterreno",
            "Areaconstruccion",
            "Valorcomercial"]

 

trainer_data = raw_data.iloc[:,[0,1,6,10,11,12,13,14,15,16,17,18,19]].fillna(0.0)
trainer_data = trainer_data.replace(categoria_bien).replace(conservacion).replace(metodo)
trainer_data.columns = colnames

#trainer_data.to_excel('Otros.xlsx')

##
trainer_data = trainer_data.replace('Centro de Salud',12).replace('Exterior','1').replace('Interior','2').replace('Malo','6').replace('Regular - Malo','6').replace('Bueno - Regular','4').applymap(lambda x : float(x.replace(',','')) if isinstance(x,str) else x)
trainer_data = trainer_data.applymap(lambda x : np.log10(x + 0.0000001))

log_data = trainer_data.drop(['Valorcomercial'], axis=1)

In [179]:
raw_data
#log_data

Unnamed: 0,Fecha entrega del Informe,Tipo de vía,Piso,Departamento,Provincia,Distrito,Número de estacionamiento,Depósitos,Latitud (Decimal),Longitud (Decimal),...,Posición,Número de frentes,Edad,Elevador,Estado de conservación,Método Representado,Área Terreno,Área Construcción,Valor comercial (USD),ranva
17,43999,1.0,,Arequipa,Arequipa,Socabaya,,,-16.453741,-71.527797,...,,,5.0,,,Comparación de mercado (directo),23.69,82.41,63038,
117,43985,1.0,,Arequipa,Arequipa,José Luis Bustamante Y Rivero,,,-16.432508,-71.515935,...,,,26.0,,Bueno,Comparación de mercado (directo),180.00,288.85,280184,
169,43514,3.0,,Arequipa,Arequipa,Cerro Colorado,0.0,0.0,-16.344756,-71.582156,...,,,10.0,,Bueno,Costos o reposición (directo),1000.00,1245.00,839162,
216,43066,1.0,,Arequipa,Arequipa,Sachaca,0.0,0.0,-16.425835,-71.564179,...,,,2.0,,Muy bueno,Comparación de mercado (directo),25.42,141.19,154396,
237,43385,,,Arequipa,Arequipa,Yura,3.0,0.0,-16.316906,-71.608684,...,,,4.0,,,Costos o reposición (directo),445.20,292.28,195938,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10545,44013,,,Arequipa,Arequipa,Yanahuara,,,-16.398683,-71.550275,...,,,43.0,,Bueno,Costos o reposición (directo),205.60,301.00,284204,
10548,43394,,,Arequipa,Arequipa,José Luis Bustamante Y Rivero,1.0,0.0,-16.415194,-71.520222,...,,,8.0,,Bueno,Comparación de mercado (directo),39.88,107.29,115599,
10572,43765,1.0,,Arequipa,Arequipa,Cayma,0.0,0.0,-16.428485,-71.574731,...,,,0.0,,,Costos o reposición (directo),263.31,283.66,256958,
10601,43760,1.0,,Arequipa,Arequipa,Cayma,0.0,0.0,,,...,,,25.0,,,Costos o reposición (directo),115.55,188.14,265087,


In [180]:
data_pca = PCA()
data_pca.fit(trainer_data)

X = trainer_data.to_numpy().T
components = (data_pca.components_@X).T

Xin = Input(shape=(12,),name='In')
X = Dense(7,activation=tf.keras.activations.softsign)(Xin)
X = Dense(7,activation=tf.keras.activations.softsign)(X)
X = Dense(7,activation=tf.keras.activations.softsign)(X)
X = Dense(7,activation=tf.keras.activations.softsign)(X)
X = Dense(1, activation='relu')(X)
full_pca_inference = Concatenate(axis=1, name = 'completion')([Xin,X])
Yout = Lambda(lambda x : tf.transpose(data_pca.components_@tf.transpose(x)))(full_pca_inference)

PCA_model = Model(inputs = Xin, outputs = [Yout], name = 'PCA')

Yfinal = Lambda(lambda x : tf.pow(tf.constant(10, dtype=tf.float32), x))(Yout)
S4D_Her0 = Model(inputs = Xin, outputs = [Yfinal], name = 'predictor')

S4D_Her0.load_weights('S4D_Her0/' + MODEL + '.h5')

S4D_Her0.compile(loss      = tf.losses.MeanAbsolutePercentageError(),
                 metrics   = [mape_accuracy])

scores = S4D_Her0.evaluate(log_data, trainer_data['Valorcomercial'].to_numpy(), verbose = 1)



In [175]:
print(log_data.shape)

pred = S4D_Her0.predict(log_data)
print(pred.shape)
#pred = np.power(10,pred)

#print(pred.shape)

#pd.concat([raw_data, ], axis=1)

#raw_data

(6846, 12)
(6846, 1)


In [161]:
pd.DataFrame(pred.T[0], columns=['Valor'])

Unnamed: 0,Valor
0,1.007318e+05
1,2.269902e+05
2,2.236679e+06
3,5.556777e+04
4,9.648724e+04
...,...
6841,1.036976e+05
6842,4.413194e+05
6843,1.440866e+05
6844,9.766821e+04
