In [66]:
import pandas as pd
import numpy as np
from statistics import mean

pd.set_option('display.max_columns', 30)

types = {'id': int, 'descripcion': str, 'tipodepropiedad': str, 'habitaciones': np.float32,
        'garages':np.float32, 'idzona': np.float32, 'banos': np.float64, 'metroscubiertos': np.float64, 'metrostotales': np.float64,
        'gimnasio': int, 'usosmultiples': int, 'piscina': int, 'escuelascercanas': int,
        'centroscomercialescercanos': int, 'precio':int}

training_data = pd.read_csv('../../data/TP2/train.csv', dtype = types, usecols=['id', 'ciudad', 'metroscubiertos', 'metrostotales', 'habitaciones', 'garages', 'banos'])
evaluation_data = pd.read_csv('../../data/TP2/test.csv', dtype = types, usecols=['id', 'ciudad', 'metroscubiertos', 'metrostotales', 'habitaciones', 'garages', 'banos'])

##### SI LOS METROS CUBIERTOS SON MAS QUE LOS TOTALES; LOS INVIERTO

In [67]:
training_data.loc[training_data.metroscubiertos>training_data.metrostotales, 'metroscub'] = training_data.metrostotales
training_data.loc[training_data.metroscubiertos>training_data.metrostotales, 'metrostotales'] = training_data.metroscubiertos
training_data.loc[training_data.metroscubiertos>training_data.metrostotales, 'metroscubiertos'] = training_data.metroscub

evaluation_data.loc[evaluation_data.metroscubiertos>evaluation_data.metrostotales, 'metroscub'] = evaluation_data.metrostotales
evaluation_data.loc[evaluation_data.metroscubiertos>evaluation_data.metrostotales, 'metrostotales'] = evaluation_data.metroscubiertos
evaluation_data.loc[evaluation_data.metroscubiertos>evaluation_data.metrostotales, 'metroscubiertos'] = evaluation_data.metroscub

In [68]:
mtscub_por_ciudad = training_data.groupby('ciudad').agg({'metroscubiertos':'mean'}).reset_index().rename(columns={'metroscubiertos':'mts_ciudad'})

meanmts=mtscub_por_ciudad.mts_ciudad.mean()
mtscub_por_ciudad.mts_ciudad.fillna(meanmts,inplace=True)

training_data = pd.merge(training_data,mtscub_por_ciudad, on='ciudad', how='left')
training_data.mts_ciudad.fillna(meanmts,inplace=True)
training_data.metroscubiertos.fillna(training_data.mts_ciudad, inplace=True)

training_data['mtscub']=pd.cut(training_data.metroscubiertos,[14,30,50,80,110,150,200,250,350,440], labels=False, precision = 1, include_lowest=True)

evaluation_data = pd.merge(evaluation_data,mtscub_por_ciudad, on='ciudad', how='left')
evaluation_data.mts_ciudad.fillna(meanmts,inplace=True)
evaluation_data.metroscubiertos.fillna(evaluation_data.mts_ciudad, inplace=True)

evaluation_data['mtscub']=pd.cut(evaluation_data.metroscubiertos,[14,30,50,80,110,150,200,250,350,440], labels=False, precision = 1, include_lowest=True)

In [69]:
ambientes = training_data.groupby('mtscub').agg({'habitaciones':'median','garages':'median','banos':'median'}).reset_index().rename(columns={'habitaciones':'mean_hab','garages':'mean_gar','banos':'mean_ban'})

In [70]:
training_data = pd.merge(training_data,ambientes,on='mtscub', how='left')
training_data.habitaciones.fillna(training_data.mean_hab,inplace=True)
training_data.garages.fillna(training_data.mean_gar,inplace=True)
training_data.banos.fillna(training_data.mean_ban,inplace=True)

evaluation_data = pd.merge(evaluation_data,ambientes,on='mtscub', how='left')
evaluation_data.habitaciones.fillna(evaluation_data.mean_hab,inplace=True)
evaluation_data.garages.fillna(evaluation_data.mean_gar,inplace=True)
evaluation_data.banos.fillna(evaluation_data.mean_ban,inplace=True)

In [73]:
training_data['delta_hab'] = training_data.habitaciones - training_data.mean_hab
training_data['delta_gar'] = training_data.garages - training_data.mean_gar
training_data['delta_ban'] = training_data.banos - training_data.mean_ban

evaluation_data['delta_hab'] = evaluation_data.habitaciones - evaluation_data.mean_hab
evaluation_data['delta_gar'] = evaluation_data.garages - evaluation_data.mean_gar
evaluation_data['delta_ban'] = evaluation_data.banos - evaluation_data.mean_ban

In [75]:
training_data = training_data[['delta_hab','delta_gar','delta_ban']]
evaluation_data = evaluation_data[['delta_hab','delta_gar','delta_ban']]

In [77]:
training_data.to_csv('../../res/ftr/delta_rooms_by_surface_train.csv')
evaluation_data.to_csv('../../res/ftr/delta_rooms_by_surface_evaluation.csv')