In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:97% !important; }</style>"))

import jupyternotify
ip = get_ipython()
ip.register_magics(jupyternotify.JupyterNotifyMagics)

import scoring as score # para hacer los reportes de puntajes
from time import time

from sklearn import preprocessing
from sklearn.linear_model import Perceptron
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

properati = pd.read_csv('datos/caba_para_mapa.csv',index_col=0,error_bad_lines=False)

#tenia un nan nose porque
properati.dropna(inplace=True)

<IPython.core.display.Javascript object>

## Preparo las columnas a usar

In [2]:
cant_buckets = 15

#buckets
cantidad,rango = np.histogram(properati['price_usd_per_m2'], bins=cant_buckets)
properati['categories_by_price']=pd.cut(properati['price_usd_per_m2'],rango,labels=np.arange(cant_buckets))
properati['price_range']=pd.cut(properati['price_usd_per_m2'],rango)
#lo casteo a float porque si no tira error 
properati['categories_by_price']=properati['categories_by_price'].astype(np.float64) 

#tenia un nan nose porque
properati.dropna(inplace=True)


In [3]:
#Creo un scoring nuevo
from sklearn.metrics import precision_score,make_scorer
precision = make_scorer(precision_score,average="macro")

### Busco una aproximacion de hiper-parametros con random search

In [6]:
%%notify

#preparo set de datos
X = zip(properati['dist_a_subte'],properati['dist_a_univ'])
y = properati['categories_by_price']

perceptron = Perceptron(n_jobs=-1)

scoring={"accuracy":"accuracy", "precision":precision} # defino diccionario para varios scorings

# Utility function to report best scores
alpha=np.arange(0.000001,1,0.000001)
pen =['l2','l1','elasticnet']
param_grid = {"alpha": alpha, "penalty": pen}

# run randomized search
random_search = RandomizedSearchCV(perceptron, param_distributions=param_grid,
n_iter=20,cv=5,scoring=scoring,refit=False)
start = time()
random_search.fit(X, y)

print("RandomizedSearchCV duro %.2f segundos para %d candidatos a hyper-parametros."
    % (time() - start, len(random_search.cv_results_['params'])))
print("")
score.report_multi(random_search.cv_results_,scoring.keys())

RandomizedSearchCV duro 14.13 segundos para 20 candidatos a hyper-parametros.

Puntajes usando precision
Puesto: 1
Promedio training score: 0.020 (std: 0.011)
Promedio validation score: 0.027 (std: 0.023)
Hyper-parametros: {'penalty': 'l2', 'alpha': 0.041708000000000002}

Puesto: 2
Promedio training score: 0.031 (std: 0.027)
Promedio validation score: 0.022 (std: 0.014)
Hyper-parametros: {'penalty': 'elasticnet', 'alpha': 0.37555299999999997}

Puesto: 3
Promedio training score: 0.021 (std: 0.010)
Promedio validation score: 0.022 (std: 0.011)
Hyper-parametros: {'penalty': 'elasticnet', 'alpha': 0.128688}

Puesto: 4
Promedio training score: 0.018 (std: 0.008)
Promedio validation score: 0.021 (std: 0.013)
Hyper-parametros: {'penalty': 'l2', 'alpha': 0.38467599999999996}

Puesto: 5
Promedio training score: 0.021 (std: 0.009)
Promedio validation score: 0.020 (std: 0.011)
Hyper-parametros: {'penalty': 'elasticnet', 'alpha': 0.021735000000000001}

Puesto: 6
Promedio training score: 0.014 (std

<IPython.core.display.Javascript object>

### Busco mas detalladamente los hiper-parametros en el rango de los mejores resultados con Grid Search

In [4]:
%%notify

#preparo set de datos
X = zip(properati['dist_a_subte'],properati['dist_a_univ'])
y = properati['categories_by_price']

perceptron = Perceptron(n_jobs=-1)
        
alpha=np.arange(0.2,0.5,0.01)
pen =['l2','elasticnet']
param_grid = {"alpha": alpha, "penalty": pen}

grid_search = GridSearchCV(perceptron,param_grid=param_grid,cv=5)
start = time()
grid_search.fit(X, y)

print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
    % (time() - start, len(grid_search.cv_results_['params'])))
score.report_single(grid_search.cv_results_)



GridSearchCV took 41.18 seconds for 60 candidate parameter settings.
Model with rank: 1
Mean training score: 0.223 (std: 0.075)
Mean validation score: 0.232 (std: 0.081)
Parameters: {'penalty': 'l2', 'alpha': 0.36000000000000015}

Model with rank: 1
Mean training score: 0.223 (std: 0.075)
Mean validation score: 0.232 (std: 0.081)
Parameters: {'penalty': 'elasticnet', 'alpha': 0.36000000000000015}

Model with rank: 3
Mean training score: 0.227 (std: 0.077)
Mean validation score: 0.232 (std: 0.081)
Parameters: {'penalty': 'l2', 'alpha': 0.37000000000000016}

Model with rank: 3
Mean training score: 0.227 (std: 0.077)
Mean validation score: 0.232 (std: 0.081)
Parameters: {'penalty': 'elasticnet', 'alpha': 0.37000000000000016}

Model with rank: 5
Mean training score: 0.222 (std: 0.075)
Mean validation score: 0.231 (std: 0.081)
Parameters: {'penalty': 'l2', 'alpha': 0.35000000000000014}

Model with rank: 5
Mean training score: 0.222 (std: 0.075)
Mean validation score: 0.231 (std: 0.081)
Para

<IPython.core.display.Javascript object>