In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split as tts
import warnings
warnings.filterwarnings('ignore')

In [2]:
from sklearn.linear_model import LinearRegression as LinReg
from sklearn.linear_model import Lasso        # regularizacion L1
from sklearn.linear_model import Ridge        # regularizacion L2
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor as RFR  
from sklearn.tree import ExtraTreeRegressor as ETR
from sklearn.ensemble import GradientBoostingRegressor as GBR
from xgboost import XGBRegressor as XGBR
from catboost import CatBoostRegressor as CTR
from lightgbm import LGBMRegressor as LGBMR
from sklearn.linear_model import PoissonRegressor as PR

In [3]:
pd.set_option('display.max_columns', None)

In [4]:
pd.set_option('display.max_rows', None)

In [5]:
boe=pd.read_csv('../data/BOE_def.csv')

In [6]:
boe.fecha_de_conclusion.unique()

array(['08-12-2022', '07-12-2022', '06-12-2022', ..., '06-02-2016',
       '27-01-2016', '10-01-2016'], dtype=object)

In [7]:
import regex as re

In [8]:
def limpiar_(fecha_de_inicio):
    fecha_de_inicio=re.findall('\d{4}', fecha_de_inicio)
    fecha_de_inicio=' '.join(fecha_de_inicio)
    return fecha_de_inicio

In [9]:
boe.fecha_de_inicio=boe.fecha_de_inicio.apply(limpiar_)

In [10]:
boe.fecha_de_conclusion=boe.fecha_de_conclusion.apply(limpiar_)

In [11]:
boe.head()

Unnamed: 0,identificador,tipo_de_subasta,cuenta_expediente,fecha_de_inicio,fecha_de_conclusion,cantidad_reclamada,anuncio_BOE,valor_subasta,tasacion,puja_minima,tramos_entre_pujas,importe_del_deposito,direccion,codigo_postal,localidad,provincia,puja,pm2
0,SUB-JA-2022-204013,JUDICIAL EN VÍA DE APREMIO,4743 0000 05 1018 20,2022,2022,7145,BOE-B-2022-35270,108482,0,0,2169,5424,Urbanización Els Racons nº 5. partida Michafiga,46220,Picassent,Valencia/València,58580,1088
1,SUB-JA-2022-196067,JUDICIAL EN VÍA DE APREMIO,2368 0000 06 0191 20,2022,2022,142298,BOE-B-2022-35269,305832,0,0,6116,15291,"CALLE GABRIEL GARCÍA MÁRQUEZ, 31",28980,PARLA,Madrid,177382,1721
2,SUB-JA-2022-204060,JUDICIAL EN VÍA DE APREMIO,2995 0000 06 0456 16,2022,2022,55933,BOE-B-2022-35263,247902,0,0,0,12395,VIVIENDA 1 BLOQUE 19 CONJUNTO RESIDENCIAL HACI...,29600,Marbella,Málaga,252000,1721
3,SUB-JA-2022-198972,JUDICIAL EN VÍA DE APREMIO,3997 0000 06 1661 20,2022,2022,210425,BOE-B-2022-28879,826272,0,0,16525,41313,"AVENIDA DE JACARANDA, 5, URBANIZACION CLUB DE ...",41930,BORMUJOS,Sevilla,512288,1557
4,SUB-JA-2022-203383,JUDICIAL EN VÍA DE APREMIO,5418 0000 06 0169 21,2022,2022,34853,BOE-B-2022-35253,121684,121684,45978,2433,6084,"Calle Palma de Mallorca, nº 13, bloque 19, 3º ...",35016,Las Palmas de Gran Canaria,Las Palmas,45978,1130


In [12]:
boe.drop(columns=['identificador','tipo_de_subasta','cuenta_expediente',
                  'anuncio_BOE','tasacion','puja_minima','tramos_entre_pujas',
                 'importe_del_deposito','direccion','localidad'], inplace=True)

In [13]:
boe.head()

Unnamed: 0,fecha_de_inicio,fecha_de_conclusion,cantidad_reclamada,valor_subasta,codigo_postal,provincia,puja,pm2
0,2022,2022,7145,108482,46220,Valencia/València,58580,1088
1,2022,2022,142298,305832,28980,Madrid,177382,1721
2,2022,2022,55933,247902,29600,Málaga,252000,1721
3,2022,2022,210425,826272,41930,Sevilla,512288,1557
4,2022,2022,34853,121684,35016,Las Palmas,45978,1130


In [14]:
boe.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6154 entries, 0 to 6153
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   fecha_de_inicio      6154 non-null   object
 1   fecha_de_conclusion  6154 non-null   object
 2   cantidad_reclamada   6154 non-null   int64 
 3   valor_subasta        6154 non-null   int64 
 4   codigo_postal        6154 non-null   int64 
 5   provincia            6154 non-null   object
 6   puja                 6154 non-null   int64 
 7   pm2                  6154 non-null   int64 
dtypes: int64(5), object(3)
memory usage: 1.4 MB


In [15]:
boe['fecha_de_inicio'] = boe['fecha_de_inicio'].astype(int)

In [16]:
boe['fecha_de_conclusion'] = boe['fecha_de_conclusion'].astype(int)

In [17]:
#boe22=boe.loc[boe['fecha_de_conclusion'] == 2022]

In [18]:
#eliminamos esta columna porque el precio se cierra en la fecha de conclusion y varia segun el año final.
boe.drop(columns=['fecha_de_inicio'], inplace=True)

In [19]:
#eliminamos codigo postal? con eso hemos sacado el pm2, no nos interesa más. Ademas contamos con la provincia.
boe.drop(columns=['codigo_postal'], inplace=True)

In [20]:
#creamos new colum con la diferencia entre la deuda(cantidad_reclamada) y el valor que le da el juez(valor_subasta)
boe['diference'] = boe['valor_subasta'].sub(boe['cantidad_reclamada'], axis = 0)

In [21]:
boe.head()

Unnamed: 0,fecha_de_conclusion,cantidad_reclamada,valor_subasta,provincia,puja,pm2,diference
0,2022,7145,108482,Valencia/València,58580,1088,101337
1,2022,142298,305832,Madrid,177382,1721,163534
2,2022,55933,247902,Málaga,252000,1721,191969
3,2022,210425,826272,Sevilla,512288,1557,615847
4,2022,34853,121684,Las Palmas,45978,1130,86831


In [22]:
boe.drop(columns=['cantidad_reclamada','valor_subasta'], inplace=True)

In [23]:
boe.head()

Unnamed: 0,fecha_de_conclusion,provincia,puja,pm2,diference
0,2022,Valencia/València,58580,1088,101337
1,2022,Madrid,177382,1721,163534
2,2022,Málaga,252000,1721,191969
3,2022,Sevilla,512288,1557,615847
4,2022,Las Palmas,45978,1130,86831


In [24]:
boe.provincia.value_counts

<bound method IndexOpsMixin.value_counts of 0            Valencia/València
1                       Madrid
2                       Málaga
3                      Sevilla
4                   Las Palmas
5                    Barcelona
6                       Madrid
7                   Las Palmas
8                      Sevilla
9            Valencia/València
10                      Huesca
11                     Almería
12           Valencia/València
13                    Albacete
14                      Madrid
15                      Madrid
16           Valencia/València
17                  Las Palmas
18                      Madrid
19      Santa Cruz de Tenerife
20                 Ciudad Real
21                    Asturias
22                     Sevilla
23                      Huelva
24                      Huesca
25      Santa Cruz de Tenerife
26                   Barcelona
27                     Almería
28                     Sevilla
29               Illes Balears
30                      Ma

In [25]:
def limpia_provincia(x):
    if 'Madrid' in x:
        return 'Madrid'
    elif 'Barcelona' in x:
        return 'Barcelona'
    elif 'Málaga' in x:
        return 'Málaga'
    elif 'Sevilla' in x:
        return 'Sevilla'
    elif 'Alicante' in x:
        return 'Alicante'
    elif 'Toledo' in x:
        return 'Toledo'
    elif 'Valencia/València' in x:
        return 'Valencia/València'
    elif 'Santa Cruz de Tenerife' in x:
        return 'Santa Cruz de Tenerife'
    elif 'Alicante/Alacant' in x:
        return 'Alicante/Alacant'
    elif 'Cádiz' in x:
        return 'Cádiz'
    elif 'Almería' in x:
        return 'Almería'
    elif 'Valencia' in x:
        return 'Valencia'
    elif 'Tarragona' in x:
        return 'Tarragona'
    elif 'Huelva' in x:
        return 'Huelva'
    elif 'Zaragoza' in x:
        return 'Zaragoza'
    elif 'Gerona' in x:
        return 'Gerona'
    elif 'Baleares' in x:
        return 'Baleares'
    elif 'Las Palmas' in x:
        return 'Las Palmas'
    elif 'Albacete' in x:
        return 'Albacete'
    elif 'Pontevedra' in x:
        return 'Pontevedra'
    elif 'Granada' in x:
        return 'Granada'
    elif 'Illes Balears' in x:
        return 'Illes Balears'
    elif 'Cantabria' in x:
        return 'Cantabria'
    elif 'Asturias' in x:
        return 'Asturias'
    elif 'Córdoba' in x:
        return 'Córdoba'
    else:
        return 'other'
    

In [26]:
boe.provincia=boe.provincia.apply(limpia_provincia)

In [27]:
boe.head()

Unnamed: 0,fecha_de_conclusion,provincia,puja,pm2,diference
0,2022,Valencia/València,58580,1088,101337
1,2022,Madrid,177382,1721,163534
2,2022,Málaga,252000,1721,191969
3,2022,Sevilla,512288,1557,615847
4,2022,Las Palmas,45978,1130,86831


In [28]:
boe=pd.get_dummies(boe, columns=['fecha_de_conclusion','provincia'], drop_first=True)

boe.head()

Unnamed: 0,puja,pm2,diference,fecha_de_conclusion_2017,fecha_de_conclusion_2018,fecha_de_conclusion_2019,fecha_de_conclusion_2020,fecha_de_conclusion_2021,fecha_de_conclusion_2022,provincia_Alicante,provincia_Almería,provincia_Asturias,provincia_Baleares,provincia_Barcelona,provincia_Cantabria,provincia_Cádiz,provincia_Córdoba,provincia_Gerona,provincia_Granada,provincia_Huelva,provincia_Illes Balears,provincia_Las Palmas,provincia_Madrid,provincia_Málaga,provincia_Pontevedra,provincia_Santa Cruz de Tenerife,provincia_Sevilla,provincia_Tarragona,provincia_Toledo,provincia_Valencia,provincia_Valencia/València,provincia_Zaragoza,provincia_other
0,58580,1088,101337,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
1,177382,1721,163534,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,252000,1721,191969,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
3,512288,1557,615847,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
4,45978,1130,86831,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [29]:
boe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6154 entries, 0 to 6153
Data columns (total 33 columns):
 #   Column                            Non-Null Count  Dtype
---  ------                            --------------  -----
 0   puja                              6154 non-null   int64
 1   pm2                               6154 non-null   int64
 2   diference                         6154 non-null   int64
 3   fecha_de_conclusion_2017          6154 non-null   uint8
 4   fecha_de_conclusion_2018          6154 non-null   uint8
 5   fecha_de_conclusion_2019          6154 non-null   uint8
 6   fecha_de_conclusion_2020          6154 non-null   uint8
 7   fecha_de_conclusion_2021          6154 non-null   uint8
 8   fecha_de_conclusion_2022          6154 non-null   uint8
 9   provincia_Alicante                6154 non-null   uint8
 10  provincia_Almería                 6154 non-null   uint8
 11  provincia_Asturias                6154 non-null   uint8
 12  provincia_Baleares                

In [30]:
boe.pm2=LabelEncoder().fit_transform(boe.pm2)

In [31]:
boe.head()

Unnamed: 0,puja,pm2,diference,fecha_de_conclusion_2017,fecha_de_conclusion_2018,fecha_de_conclusion_2019,fecha_de_conclusion_2020,fecha_de_conclusion_2021,fecha_de_conclusion_2022,provincia_Alicante,provincia_Almería,provincia_Asturias,provincia_Baleares,provincia_Barcelona,provincia_Cantabria,provincia_Cádiz,provincia_Córdoba,provincia_Gerona,provincia_Granada,provincia_Huelva,provincia_Illes Balears,provincia_Las Palmas,provincia_Madrid,provincia_Málaga,provincia_Pontevedra,provincia_Santa Cruz de Tenerife,provincia_Sevilla,provincia_Tarragona,provincia_Toledo,provincia_Valencia,provincia_Valencia/València,provincia_Zaragoza,provincia_other
0,58580,436,101337,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
1,177382,805,163534,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,252000,805,191969,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
3,512288,715,615847,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
4,45978,467,86831,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [32]:
#boe.diference.unique()

In [33]:
#boe=boe.rename(columns = {'diff':'diference'})

In [34]:
boe.diference=LabelEncoder().fit_transform(boe.diference)

In [35]:
boe.head()

Unnamed: 0,puja,pm2,diference,fecha_de_conclusion_2017,fecha_de_conclusion_2018,fecha_de_conclusion_2019,fecha_de_conclusion_2020,fecha_de_conclusion_2021,fecha_de_conclusion_2022,provincia_Alicante,provincia_Almería,provincia_Asturias,provincia_Baleares,provincia_Barcelona,provincia_Cantabria,provincia_Cádiz,provincia_Córdoba,provincia_Gerona,provincia_Granada,provincia_Huelva,provincia_Illes Balears,provincia_Las Palmas,provincia_Madrid,provincia_Málaga,provincia_Pontevedra,provincia_Santa Cruz de Tenerife,provincia_Sevilla,provincia_Tarragona,provincia_Toledo,provincia_Valencia,provincia_Valencia/València,provincia_Zaragoza,provincia_other
0,58580,436,3988,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
1,177382,805,4951,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,252000,805,5186,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
3,512288,715,5790,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
4,45978,467,3602,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [36]:
boe_x=boe.drop('puja', axis=1)
boe_y=boe.puja

In [37]:
X_train, X_test, y_train, y_test = tts(boe_x, boe_y,train_size=0.9, test_size=0.1,random_state =71)

**Iniciamos los modelos**

In [38]:
linreg=LinReg()
lasso=Lasso()
ridge=Ridge()
elastic=ElasticNet()
svr=SVR()
rfr=RFR()
etr=ETR()
gbr=GBR()
xgbr=XGBR()
ctr=CTR()
lgbmr=LGBMR()
pr=PR()

**Entrenamos modelos**

In [39]:
linreg.fit(X_train, y_train)
lasso.fit(X_train, y_train)
ridge.fit(X_train, y_train)
elastic.fit(X_train, y_train)
svr.fit(X_train, y_train)
rfr.fit(X_train, y_train)
etr.fit(X_train, y_train)
gbr.fit(X_train, y_train)
xgbr.fit(X_train, y_train)
ctr.fit(X_train, y_train)
lgbmr.fit(X_train, y_train)
pr.fit(X_train, y_train)

Learning rate set to 0.053657
0:	learn: 585981.7138637	total: 58.3ms	remaining: 58.2s
1:	learn: 578963.7189311	total: 59.3ms	remaining: 29.6s
2:	learn: 572296.3133130	total: 60.6ms	remaining: 20.1s
3:	learn: 565699.3197923	total: 61.7ms	remaining: 15.4s
4:	learn: 559293.8174444	total: 62.9ms	remaining: 12.5s
5:	learn: 553010.9356954	total: 63.8ms	remaining: 10.6s
6:	learn: 546848.7600110	total: 64.8ms	remaining: 9.19s
7:	learn: 540778.7799282	total: 65.9ms	remaining: 8.17s
8:	learn: 534840.6085775	total: 67.1ms	remaining: 7.38s
9:	learn: 528943.9284238	total: 68.2ms	remaining: 6.76s
10:	learn: 523169.1912604	total: 69.4ms	remaining: 6.24s
11:	learn: 517534.3680926	total: 70.5ms	remaining: 5.8s
12:	learn: 511978.0719193	total: 71.6ms	remaining: 5.43s
13:	learn: 506518.3586932	total: 72.5ms	remaining: 5.11s
14:	learn: 501136.4188823	total: 73.5ms	remaining: 4.83s
15:	learn: 495912.5987216	total: 74.6ms	remaining: 4.58s
16:	learn: 490754.1099121	total: 75.5ms	remaining: 4.37s
17:	learn: 4

312:	learn: 179246.8687535	total: 387ms	remaining: 850ms
313:	learn: 179157.3891456	total: 388ms	remaining: 848ms
314:	learn: 179129.4884768	total: 389ms	remaining: 846ms
315:	learn: 179102.3501812	total: 390ms	remaining: 844ms
316:	learn: 179075.9520652	total: 391ms	remaining: 843ms
317:	learn: 179050.2727066	total: 392ms	remaining: 841ms
318:	learn: 178942.1468242	total: 393ms	remaining: 840ms
319:	learn: 178917.3197109	total: 394ms	remaining: 838ms
320:	learn: 178893.1669999	total: 395ms	remaining: 836ms
321:	learn: 178869.6694259	total: 396ms	remaining: 834ms
322:	learn: 177941.8784943	total: 397ms	remaining: 833ms
323:	learn: 176580.3949936	total: 399ms	remaining: 831ms
324:	learn: 176474.6354211	total: 400ms	remaining: 830ms
325:	learn: 176451.6073834	total: 401ms	remaining: 829ms
326:	learn: 176429.2027098	total: 402ms	remaining: 827ms
327:	learn: 176338.7975395	total: 403ms	remaining: 825ms
328:	learn: 174986.9770098	total: 404ms	remaining: 824ms
329:	learn: 174965.0205370	tota

498:	learn: 137909.8564907	total: 580ms	remaining: 583ms
499:	learn: 137881.8040437	total: 581ms	remaining: 581ms
500:	learn: 137095.2793320	total: 582ms	remaining: 580ms
501:	learn: 136325.6463480	total: 583ms	remaining: 579ms
502:	learn: 136270.2413789	total: 584ms	remaining: 577ms
503:	learn: 136135.7226920	total: 586ms	remaining: 576ms
504:	learn: 136041.9254250	total: 587ms	remaining: 575ms
505:	learn: 136021.1214984	total: 588ms	remaining: 574ms
506:	learn: 135923.7578702	total: 589ms	remaining: 573ms
507:	learn: 135817.4377077	total: 590ms	remaining: 572ms
508:	learn: 135751.3369263	total: 591ms	remaining: 570ms
509:	learn: 135719.1254689	total: 592ms	remaining: 569ms
510:	learn: 135686.3615544	total: 593ms	remaining: 568ms
511:	learn: 135629.0814880	total: 594ms	remaining: 566ms
512:	learn: 135585.2560259	total: 595ms	remaining: 565ms
513:	learn: 135565.3731943	total: 596ms	remaining: 564ms
514:	learn: 135524.4020971	total: 597ms	remaining: 563ms
515:	learn: 135483.3612724	tota

683:	learn: 117293.3344344	total: 773ms	remaining: 357ms
684:	learn: 117210.3520239	total: 774ms	remaining: 356ms
685:	learn: 117138.5058475	total: 775ms	remaining: 355ms
686:	learn: 117121.9227911	total: 776ms	remaining: 354ms
687:	learn: 117056.3663765	total: 777ms	remaining: 353ms
688:	learn: 116528.7412341	total: 778ms	remaining: 351ms
689:	learn: 116499.6890124	total: 779ms	remaining: 350ms
690:	learn: 115984.0014532	total: 780ms	remaining: 349ms
691:	learn: 115946.9981529	total: 782ms	remaining: 348ms
692:	learn: 115446.1928196	total: 783ms	remaining: 347ms
693:	learn: 115411.0561833	total: 784ms	remaining: 346ms
694:	learn: 115374.6484198	total: 785ms	remaining: 344ms
695:	learn: 115369.3292093	total: 786ms	remaining: 343ms
696:	learn: 115263.2740580	total: 787ms	remaining: 342ms
697:	learn: 114769.4065787	total: 788ms	remaining: 341ms
698:	learn: 114756.2114615	total: 789ms	remaining: 340ms
699:	learn: 114277.0270452	total: 790ms	remaining: 339ms
700:	learn: 113805.1649512	tota

868:	learn: 99647.1982099	total: 966ms	remaining: 146ms
869:	learn: 99615.2804999	total: 967ms	remaining: 145ms
870:	learn: 99591.9290164	total: 969ms	remaining: 143ms
871:	learn: 99551.7377981	total: 970ms	remaining: 142ms
872:	learn: 99517.8220957	total: 971ms	remaining: 141ms
873:	learn: 99476.8708893	total: 972ms	remaining: 140ms
874:	learn: 99203.3851531	total: 973ms	remaining: 139ms
875:	learn: 99152.7183796	total: 974ms	remaining: 138ms
876:	learn: 99133.9391527	total: 975ms	remaining: 137ms
877:	learn: 99067.0384517	total: 976ms	remaining: 136ms
878:	learn: 99030.6988632	total: 977ms	remaining: 135ms
879:	learn: 98978.6523151	total: 978ms	remaining: 133ms
880:	learn: 98914.2779161	total: 979ms	remaining: 132ms
881:	learn: 98882.4587726	total: 980ms	remaining: 131ms
882:	learn: 98871.5843499	total: 982ms	remaining: 130ms
883:	learn: 98844.5240094	total: 982ms	remaining: 129ms
884:	learn: 98829.3924672	total: 983ms	remaining: 128ms
885:	learn: 98801.6842310	total: 984ms	remaining

PoissonRegressor()

In [40]:
from sklearn.metrics import mean_squared_error as mse

In [41]:
from sklearn.metrics import r2_score as r2

In [42]:
#Import Lazypredict and all libraries
import lazypredict
from lazypredict.Supervised import LazyRegressor
from sklearn.model_selection import train_test_split
import os
import pandas as pd
import numpy as np

In [43]:
# regresion lineal

y_pred=linreg.predict(X_test)

mse(y_test, y_pred, squared=False), r2(y_test, y_pred)

(482035.3526619082, 0.046990907165564755)

In [44]:
linreg.score(X_train, y_train), linreg.score(X_test, y_test)


(0.02520711096598538, 0.046990907165564755)

In [45]:
# lasso

y_pred=lasso.predict(X_test)

mse(y_test, y_pred, squared=False) , r2(y_test, y_pred)

(482035.48201559565, 0.04699039568749075)

In [46]:
lasso.score(X_train, y_train), lasso.score(X_test, y_test)

(0.025207081734607062, 0.04699039568749075)

In [47]:
# ridge

y_pred=ridge.predict(X_test)

mse(y_test, y_pred, squared=False), r2(y_test, y_pred)

(482035.3220759847, 0.0469910281055006)

In [48]:
# elastic

y_pred=elastic.predict(X_test)

mse(y_test, y_pred, squared=False), r2(y_test, y_pred)

(481283.2180950449, 0.04996260565917199)

In [49]:
## rfr

y_pred=rfr.predict(X_test)

mse(y_test, y_pred, squared=False)  , r2(y_test, y_pred)# RMSE

(435408.6056773582, 0.22244113032449042)

In [50]:
##gbr
y_pred=gbr.predict(X_test)

mse(y_test, y_pred, squared=False) , r2(y_test, y_pred) 

(437549.13510239596, 0.21477716227536803)

In [51]:
##xgbr

y_pred=xgbr.predict(X_test)

mse(y_test, y_pred, squared=False), r2(y_test, y_pred)  # RMSE

(415553.6360342196, 0.2917388239444417)

In [52]:
##ctr
y_pred=ctr.predict(X_test)

mse(y_test, y_pred, squared=False), r2(y_test, y_pred)  # RMSE

(422793.1602615926, 0.2668460670509849)

In [53]:
## lbbmr
y_pred=lgbmr.predict(X_test)

mse(y_test, y_pred, squared=False), r2(y_test, y_pred)  # RMSE

(383249.4637354816, 0.3975758486228004)

In [54]:
#40%r2 aplicando funcion provincias, el resto dummies y labelencoder. 9 1 71

In [55]:
lazy = LazyRegressor(verbose=0,ignore_warnings=True, custom_metric=None)
models, predictions = lazy.fit(X_train, X_test, y_train, y_test)

100%|███████████████████████████████████████████| 42/42 [22:31<00:00, 32.17s/it]


In [56]:
print(models)

                               Adjusted R-Squared  R-Squared        RMSE  \
Model                                                                      
LGBMRegressor                                0.36       0.40   383643.60   
HistGradientBoostingRegressor                0.35       0.38   388844.54   
XGBRegressor                                 0.25       0.29   415558.80   
BaggingRegressor                             0.19       0.23   433229.32   
DecisionTreeRegressor                        0.18       0.23   434165.92   
GradientBoostingRegressor                    0.17       0.21   437537.26   
RandomForestRegressor                        0.16       0.20   440294.22   
AdaBoostRegressor                            0.13       0.17   449495.94   
KNeighborsRegressor                          0.12       0.16   452256.83   
ExtraTreesRegressor                          0.06       0.11   467123.88   
PoissonRegressor                             0.02       0.08   474813.07   
OrthogonalMa