In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split as tts

In [None]:
from sklearn.linear_model import LinearRegression as LinReg
from sklearn.linear_model import Lasso        # regularizacion L1
from sklearn.linear_model import Ridge        # regularizacion L2
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor as RFR  
from sklearn.tree import ExtraTreeRegressor as ETR
from sklearn.ensemble import GradientBoostingRegressor as GBR
from xgboost import XGBRegressor as XGBR
from catboost import CatBoostRegressor as CTR
from lightgbm import LGBMRegressor as LGBMR

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
pd.set_option('display.max_rows', None)

In [None]:
boe=pd.read_csv('../data/BOE_def.csv')

In [None]:
boe.fecha_de_conclusion.unique()

In [None]:
import regex as re

In [None]:
def limpiar_(fecha_de_inicio):
    fecha_de_inicio=re.findall('\d{4}', fecha_de_inicio)
    fecha_de_inicio=' '.join(fecha_de_inicio)
    return fecha_de_inicio

In [None]:
boe.fecha_de_inicio=boe.fecha_de_inicio.apply(limpiar_)

In [None]:
boe.fecha_de_conclusion=boe.fecha_de_conclusion.apply(limpiar_)

In [None]:
boe.head()

In [None]:
boe.drop(columns=['identificador','tipo_de_subasta','cuenta_expediente',
                  'anuncio_BOE','tasacion','puja_minima','tramos_entre_pujas',
                 'importe_del_deposito','direccion','localidad'], inplace=True)

In [None]:
boe.head()

In [None]:
boe.info(memory_usage='deep')

In [None]:
boe['fecha_de_inicio'] = boe['fecha_de_inicio'].astype(int)

In [None]:
boe['fecha_de_conclusion'] = boe['fecha_de_conclusion'].astype(int)

In [None]:
#eliminamos esta columna porque el precio se cierra en la fecha de conclusion y varia segun el año final.
boe.drop(columns=['fecha_de_inicio'], inplace=True)

In [None]:
#eliminamos codigo postal? con eso hemos sacado el pm2, no nos interesa más. Ademas contamos con la provincia.
boe.drop(columns=['codigo_postal'], inplace=True)

In [None]:
boe.head()

In [None]:
#creamos new colum con la diferencia entre la deuda(cantidad_reclamada) y el valor que le da el juez(valor_subasta)
boe['diff'] = boe['valor_subasta'].sub(boe['cantidad_reclamada'], axis = 0)

In [None]:
boe.head()

In [None]:
boe.drop(columns=['cantidad_reclamada','valor_subasta'], inplace=True)

In [None]:
boe.head()

In [None]:
boe=pd.get_dummies(boe, columns=['fecha_de_conclusion','provincia'], drop_first=True)

boe.head()

In [None]:
boe.head()

In [None]:
boe.pm2=LabelEncoder().fit_transform(boe.pm2)

In [None]:
boe.head()

In [None]:
boe.diference.unique()

In [None]:
boe=boe.rename(columns = {'diff':'diference'})

In [None]:
boe.diference=LabelEncoder().fit_transform(boe.diference)

In [None]:
boe.head()

In [None]:
boe_x=boe.drop('puja', axis=1)
boe_y=boe.puja

In [None]:
X_train, X_test, y_train, y_test = tts(boe_x, boe_y,train_size=0.9, test_size=0.1,random_state =71)

**Iniciamos los modelos**

In [None]:
linreg=LinReg()
lasso=Lasso()
ridge=Ridge()
elastic=ElasticNet()
svr=SVR()
rfr=RFR()
etr=ETR()
gbr=GBR()
xgbr=XGBR()
ctr=CTR()
lgbmr=LGBMR()

**Entrenamos modelos**

In [None]:
linreg.fit(X_train, y_train)
lasso.fit(X_train, y_train)
ridge.fit(X_train, y_train)
elastic.fit(X_train, y_train)
svr.fit(X_train, y_train)
rfr.fit(X_train, y_train)
etr.fit(X_train, y_train)
gbr.fit(X_train, y_train)
xgbr.fit(X_train, y_train)
ctr.fit(X_train, y_train)
lgbmr.fit(X_train, y_train)

In [None]:
from sklearn.metrics import mean_squared_error as mse

In [None]:
from sklearn.metrics import r2_score as r2

In [None]:
#Import Lazypredict and all libraries
import lazypredict
from lazypredict.Supervised import LazyRegressor
from sklearn.model_selection import train_test_split
import os
import pandas as pd
import numpy as np

In [None]:
lazy = LazyRegressor(verbose=0,ignore_warnings=True, custom_metric=None)
models, predictions = lazy.fit(X_train, X_test, y_train, y_test)

In [None]:
print(models)