# Generación de variables explicativas para parcelas sin precio

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt

# 1. Dataset

In [None]:
parcelas = gpd.read_file('../data/parcelas_sur_prices_.geojson')

In [None]:
f, ax = plt.subplots(figsize=(12,8))

parcelas.geometry.boundary.plot(ax=ax, color='black', linewidth=0.01)
parcelas[~parcelas['usdm2'].isin([np.nan, np.inf, -np.inf])].plot(column='usdm2', ax=ax, legend=True)
ax.set_axis_off();

In [None]:
from REM.utils import neighbor_fields

In [None]:
# vecinos
gkbs = '+proj=tmerc +lat_0=-34.6297166 +lon_0=-58.4627 +k=0.9999980000000001 +x_0=100000 +y_0=100000 +ellps=intl +units=m +no_defs'
parcels_nanin = parcelas[parcelas['usdm2_f'].isna()].copy()
parcelas_ = neighbor_fields(poly_gdf=parcels_nanin, 
                            proj=gkbs, N=1, field_name='usdm2_f')

In [None]:
len(parcelas_)

In [None]:
# UNIVERSO TOTAL
parcelas[['usdm2','usdm2_f','price_adj']].isna().value_counts()

In [None]:
parcelas_.head(3)

# 2. Target & Variables explicativas

In [None]:
from REM.datasources import api_parcelas

In [None]:
parcelas_extra_ft = api_parcelas(parcelas_['smp'].values)

In [None]:
parcelas_xtft_df = pd.concat(parcelas_extra_ft)

In [None]:
api_cols = ['superficie_total', 'superficie_cubierta', 'frente', 'fondo',
            'propiedad_horizontal', 'pisos_bajo_rasante', 'pisos_sobre_rasante',
            'unidades_funcionales', 'locales', 'vuc', 'cantidad_puertas',
            'lon_ctroid', 'lat_ctroid']

In [None]:
api_feat = parcelas_xtft_df[api_cols].copy()

In [None]:
api_feat.head(2)

In [None]:
parcelas_base = parcelas_.set_index('smp').copy()

In [None]:
corrections = []
for idx in parcelas_base.index:
    if idx[-1].islower():
        corrections.append(idx[:-1]+idx[-1].upper())
    else:
        corrections.append(idx)

In [None]:
parcelas_base.index = corrections

In [None]:
len(parcelas_base)

In [None]:
xvars_union = parcelas_base.join(api_feat)
xvars_union['lat_ctroid'].isna().value_counts() # 5 perdidos

In [None]:
parcel_feats = xvars_union.loc[~xvars_union['lat_ctroid'].isna()].copy()

In [None]:
parcel_feats.info()

In [None]:
parcel_feats['usdm2_f'].isna().value_counts()

In [None]:
parcel_feats.info()

In [None]:
parcel_feats['superficie_total']= parcel_feats['superficie_total'].astype(float)
parcel_feats['superficie_cubierta'] = parcel_feats['superficie_cubierta'].astype(float)
parcel_feats['frente'] = parcel_feats['frente'].astype(float)
parcel_feats['fondo'] = parcel_feats['fondo'].astype(float)
parcel_feats['propiedad_horizontal'] = parcel_feats['propiedad_horizontal'].replace({'Si':1, 'No':0})
parcel_feats['pisos_bajo_rasante'] = parcel_feats['pisos_bajo_rasante'].astype(float)
parcel_feats['pisos_sobre_rasante'] = parcel_feats['pisos_sobre_rasante'].astype(float)
parcel_feats['unidades_funcionales'] = parcel_feats['unidades_funcionales'].astype(float)
parcel_feats['locales'] = parcel_feats['locales'].astype(float)
parcel_feats['vuc'] = parcel_feats['vuc'].astype(float)
parcel_feats['cantidad_puertas'] = parcel_feats['cantidad_puertas'].astype(float)

In [None]:
parcel_feats.info()

In [None]:
parcel_feats.columns

In [None]:
parcel_feats

In [None]:
expvars = ['nearest_price_1', 'nearest_area_1','nearest_price_2', 'nearest_area_2',
           'superficie_total', 'superficie_cubierta', 
           'frente','fondo', 'propiedad_horizontal', 'pisos_bajo_rasante',
           'pisos_sobre_rasante', 'unidades_funcionales', 'locales', 'vuc',
           'cantidad_puertas', 'lon_ctroid', 'lat_ctroid']

In [None]:
parcel_feats[expvars].to_csv('../data/sprecio_expvars.csv')