# Índice

* Modelos
    * [Regresión lineal](#Regresión-lineal)
    * [Regresión polinomial](#Polinomial)
    * [K vecinos](#K-nn-regressor)
    * [Árboles de decisión](#Árbol-de-decisión)
    * [Random forests](#Random-Forest)
    * [Gradient Boosting Trees](#GBT)
    * [Light Gradient Boosting Machine](#LGBM)
    
* [Búsqueda de hiperparámetros](#Búsqueda-de-hiperpárametros)

In [53]:
# Data science
import pandas as pd
import numpy as np

# División de datos
from sklearn.model_selection import train_test_split, KFold

from sklearn.preprocessing import StandardScaler # Preprocesamiento

# Reducción de dimensionalidad
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from sklearn.metrics import r2_score # métrica

# Modelos
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
import lightgbm as lgb

# Hiperparámetros
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

import plotly.graph_objects as go # visualización

In [4]:
def reducir(X_train, y_train, X_test=None, y_test=None, k=2, metodo=""):
    transformadores = {"pca":PCA, "svd":TruncatedSVD, "lda":LinearDiscriminantAnalysis}
    metodo = metodo.lower()
    if metodo not in transformadores:
        raise ValueError("Ponle un valor chido al método")
        
    transformador = transformadores[metodo](n_components=k)
    X_train_r = transformador.fit_transform(X=X_train, y=y_train)
    if X_test is not None:
        X_test_r = transformador.transform(X_test)
        return X_train_r, X_test_r
    
    return X_train_r

In [5]:
data = pd.read_csv("../spotify/data.csv")
data.head()

Unnamed: 0,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo,valence,year
0,0.991,['Mamie Smith'],0.598,168333,0.224,0,0cS0A1fUEUd1EW3FcF8AEI,0.000522,5,0.379,-12.628,0,Keep A Song In Your Soul,12,1920,0.0936,149.976,0.634,1920
1,0.643,"[""Screamin' Jay Hawkins""]",0.852,150200,0.517,0,0hbkKFIJm7Z05H8Zl9w30f,0.0264,5,0.0809,-7.261,0,I Put A Spell On You,7,1920-01-05,0.0534,86.889,0.95,1920
2,0.993,['Mamie Smith'],0.647,163827,0.186,0,11m7laMUgmOKqI3oYzuhne,1.8e-05,0,0.519,-12.098,1,Golfing Papa,4,1920,0.174,97.6,0.689,1920
3,0.000173,['Oscar Velazquez'],0.73,422087,0.798,0,19Lc5SfJJ5O1oaxY0fpwfh,0.801,2,0.128,-7.311,1,True House Music - Xavier Santos & Carlos Gomi...,17,1920-01-01,0.0425,127.997,0.0422,1920
4,0.295,['Mixe'],0.704,165224,0.707,1,2hJjbsLCytGsnAHfdsLejp,0.000246,10,0.402,-6.036,0,Xuniverxe,2,1920-10-01,0.0768,122.076,0.299,1920


In [6]:
quitar = ["popularity", "artists", "id", "release_date", "name"]
X = data.drop(columns=quitar).values
y = data["popularity"].values

In [7]:
X_notest, X_test, y_notest, y_test = train_test_split(X, y, test_size=0.25, random_state=3)

In [8]:
sc = StandardScaler()
X_notest_st = sc.fit_transform(X_notest)
X_test_st = sc.transform(X_test)

In [9]:
X_notest_r, X_test_r = reducir(X_notest,y_notest, X_test, y_test, k = 3, metodo="pca")

#fig = go.Figure(data = go.Scatter3d(x = X_notest_r[:,0], y = X_notest_r[:,1], z = X_notest_r[:,2], 
                                     #mode = "markers", marker = {"color": y_notest, "size": 5}))
#fig.show()

In [None]:
X_notest_r, X_test_r = reducir(X_notest,y_notest, X_test, y_test, k = 3, metodo="svd")

#fig = go.Figure(data = go.Scatter3d(x = X_notest_r[:,0], y = X_notest_r[:,1], z = X_notest_r[:,2], 
                                     #mode = "markers", marker = {"color": y_notest, "size": 5}))
#fig.show()

In [17]:
X_notest_r, X_test_r = reducir(X_notest,y_notest, X_test, y_test, k = 3, metodo="lda")

#fig = go.Figure(data = go.Scatter3d(x = X_notest_r[:,0], y = X_notest_r[:,1], z = X_notest_r[:,2], 
                                     #mode = "markers", marker = {"color": y_notest, "size": 5}))
#fig.show()

In [13]:
def probar_metodo(estimador, X_notest, y_notest, X_test, y_test, 
                  k, metodo, **estimator_params):
    
    X_notest_r, X_test_r = reducir(X_notest, y_notest, 
                                   X_test, y_test, 
                                   k = k, metodo=metodo)
    
    kf = KFold(n_splits=4)

    scores_fold = []
    modelos = []
    for train_index, valid_index in kf.split(X_notest_r, y_notest):
        X_train, X_valid = X_notest_r[train_index,:], X_notest_r[valid_index,:]
        y_train, y_valid = y_notest[train_index], y_notest[valid_index]

        modelo = estimador(**estimator_params)
        modelo.fit(X_train, y_train)
        
        modelos.append(modelo)
        y_pred = modelo.predict(X_valid)
        scores_fold.append(r2_score(y_valid, y_pred))

    nombre_estimador = modelo.__class__.__name__
    print(f"{nombre_estimador} con {metodo}")
    print(f"R^2: {np.mean(scores_fold)} +- {np.std(scores_fold)}")
    
    best_i = np.argmax(scores_fold)
    best_model = modelos[best_i]
    best_score = scores_fold[best_i]
    
    print(f"Test: {best_model.score(X_test_r, y_test)}")
    
    return best_model

# Regresión lineal
## PCA

In [12]:
%%time
probar_metodo(LinearRegression, 
                   X_notest, y_notest, 
                   X_test, y_test, 
                   k = 3, metodo="pca")

LinearRegression con pca
R^2: 0.26400524845011314 +- 0.0022675107294706184
Test: 0.27323753660597505
CPU times: user 2.01 s, sys: 103 ms, total: 2.11 s
Wall time: 419 ms


LinearRegression()

In [14]:
%%time
probar_metodo(LinearRegression, 
                   X_notest_st, y_notest, 
                   X_test_st, y_test, 
                   k = 3, metodo="pca")

LinearRegression con pca
R^2: 0.22313326518586798 +- 0.006186290585090704
Test: 0.23044244206461162
CPU times: user 959 ms, sys: 46.6 ms, total: 1.01 s
Wall time: 193 ms


LinearRegression()

## SVD

In [15]:
%%time
probar_metodo(LinearRegression, 
                   X_notest, y_notest, 
                   X_test, y_test, 
                   k = 3, metodo="svd")

LinearRegression con svd
R^2: 0.26191278096340576 +- 0.0023531695773168606
Test: 0.2710588571406948
CPU times: user 972 ms, sys: 9.38 ms, total: 981 ms
Wall time: 185 ms


LinearRegression()

In [16]:
%%time
probar_metodo(LinearRegression, 
                   X_notest_st, y_notest, 
                   X_test_st, y_test, 
                   k = 3, metodo="svd")

LinearRegression con svd
R^2: 0.22313339184177333 +- 0.0061862478161035335
Test: 0.2304424388557481
CPU times: user 1.12 s, sys: 31.7 ms, total: 1.15 s
Wall time: 245 ms


LinearRegression()

## LDA

In [18]:
%%time
probar_metodo(LinearRegression, 
                   X_notest, y_notest, 
                   X_test, y_test, 
                   k = 3, metodo="lda")

LinearRegression con lda
R^2: 0.36184019708173365 +- 0.004650323828497517
Test: 0.36856598690593967
CPU times: user 497 ms, sys: 30.4 ms, total: 527 ms
Wall time: 262 ms


LinearRegression()

In [17]:
%%time
probar_metodo(LinearRegression, 
                   X_notest_st, y_notest, 
                   X_test_st, y_test, 
                   k = 3, metodo="lda")

LinearRegression con lda
R^2: 0.3618401970817338 +- 0.004650323828497529
Test: 0.3685659869059398
CPU times: user 510 ms, sys: 37.5 ms, total: 548 ms
Wall time: 317 ms


LinearRegression()

En regresión lineal, **estandarizar no ayuda** y el mejor método para reducir es **LDA**

# Polinomial

In [19]:
PolinomialRegression = Pipeline
pasos = [("poly_features", PolynomialFeatures(degree=3)),
         ("model", LinearRegression())]

## PCA

In [20]:
%%time
probar_metodo(PolinomialRegression, 
                   X_notest, y_notest, 
                   X_test, y_test, 
                   k = 3, metodo="pca", steps=pasos)

Pipeline con pca
R^2: 0.30265248923882637 +- 0.005697674070831135
Test: 0.304427521080444
CPU times: user 3.13 s, sys: 157 ms, total: 3.29 s
Wall time: 675 ms


Pipeline(steps=[('poly_features', PolynomialFeatures(degree=3)),
                ('model', LinearRegression())])

In [21]:
%%time
probar_metodo(PolinomialRegression, 
                   X_notest_st, y_notest, 
                   X_test_st, y_test, 
                   k = 3, metodo="pca", steps=pasos)

Pipeline con pca
R^2: 0.3195606322813509 +- 0.005078314090597231
Test: 0.3263750614450244
CPU times: user 2.34 s, sys: 159 ms, total: 2.5 s
Wall time: 452 ms


Pipeline(steps=[('poly_features', PolynomialFeatures(degree=3)),
                ('model', LinearRegression())])

## SVD

In [22]:
%%time
probar_metodo(PolinomialRegression, 
                   X_notest, y_notest, 
                   X_test, y_test, 
                   k = 3, metodo="svd", steps=pasos)

Pipeline con svd
R^2: 0.3884589304034293 +- 0.0027566695874204844
Test: 0.39515569372805714
CPU times: user 2.05 s, sys: 87.3 ms, total: 2.14 s
Wall time: 389 ms


Pipeline(steps=[('poly_features', PolynomialFeatures(degree=3)),
                ('model', LinearRegression())])

In [23]:
%%time
probar_metodo(PolinomialRegression, 
                   X_notest_st, y_notest, 
                   X_test_st, y_test, 
                   k = 3, metodo="svd", steps=pasos)

Pipeline con svd
R^2: 0.31956095443888677 +- 0.005078272803353199
Test: 0.3263751059176979
CPU times: user 2.19 s, sys: 125 ms, total: 2.32 s
Wall time: 419 ms


Pipeline(steps=[('poly_features', PolynomialFeatures(degree=3)),
                ('model', LinearRegression())])

## Con LDA

In [24]:
%%time
probar_metodo(PolinomialRegression, 
                   X_notest, y_notest, 
                   X_test, y_test, 
                   k = 3, metodo="lda", steps=pasos)

Pipeline con lda
R^2: 0.40351233505867296 +- 0.002673981948807353
Test: 0.406888981705244
CPU times: user 1.61 s, sys: 91.9 ms, total: 1.7 s
Wall time: 457 ms


Pipeline(steps=[('poly_features', PolynomialFeatures(degree=3)),
                ('model', LinearRegression())])

In [25]:
%%time
probar_metodo(PolinomialRegression, 
                   X_notest_st, y_notest, 
                   X_test_st, y_test, 
                   k = 3, metodo="lda", steps=pasos)

Pipeline con lda
R^2: 0.40351233505867723 +- 0.00267398194880763
Test: 0.4068889817052481
CPU times: user 1.55 s, sys: 102 ms, total: 1.65 s
Wall time: 455 ms


Pipeline(steps=[('poly_features', PolynomialFeatures(degree=3)),
                ('model', LinearRegression())])

Con polinomios de tercer grado, realmente **estandarizar no ayudó** (solamente un poquito con PCA).

El mejor método para reducir fue con **LDA**

# K-nn regressor

## PCA

In [33]:
%%time
probar_metodo(KNeighborsRegressor, X_notest, y_notest, X_test, y_test, 
                  k=3, metodo="pca", n_neighbors=10)

KNeighborsRegressor con pca
R^2: 0.27302025968648413 +- 0.0006544103886522212
Test: 0.28298048393383923
CPU times: user 2.81 s, sys: 104 ms, total: 2.91 s
Wall time: 1.24 s


KNeighborsRegressor(n_neighbors=10)

In [34]:
%%time
probar_metodo(KNeighborsRegressor, X_notest_st, y_notest, X_test_st, y_test, 
                  k=3, metodo="pca", n_neighbors=10)

KNeighborsRegressor con pca
R^2: 0.2901768945633608 +- 0.005119403839980977
Test: 0.29626137468963076
CPU times: user 2.85 s, sys: 92.9 ms, total: 2.94 s
Wall time: 1.27 s


KNeighborsRegressor(n_neighbors=10)

## SVD

In [35]:
%%time
probar_metodo(KNeighborsRegressor, X_notest, y_notest, X_test, y_test, 
                  k=3, metodo="svd", n_neighbors=10)

KNeighborsRegressor con svd
R^2: 0.27180944722693845 +- 0.0008436563815081078
Test: 0.2819016410375432
CPU times: user 2.9 s, sys: 104 ms, total: 3 s
Wall time: 1.24 s


KNeighborsRegressor(n_neighbors=10)

In [36]:
%%time
probar_metodo(KNeighborsRegressor, X_notest_st, y_notest, X_test_st, y_test, 
                  k=3, metodo="svd", n_neighbors=10)

KNeighborsRegressor con svd
R^2: 0.2901744523498199 +- 0.00512357963396762
Test: 0.2963000748035337
CPU times: user 2.97 s, sys: 79.1 ms, total: 3.05 s
Wall time: 1.27 s


KNeighborsRegressor(n_neighbors=10)

## LDA

In [40]:
%%time
probar_metodo(KNeighborsRegressor, X_notest, y_notest, X_test, y_test, 
                  k=3, metodo="lda", n_neighbors=10)

KNeighborsRegressor con lda
R^2: 0.448627992670915 +- 0.003253402757407348
Test: 0.45474888025937976
CPU times: user 2.43 s, sys: 55.4 ms, total: 2.49 s
Wall time: 1.37 s


KNeighborsRegressor(n_neighbors=10)

In [39]:
%%time
probar_metodo(KNeighborsRegressor, X_notest_st, y_notest, X_test_st, y_test, 
                  k=3, metodo="lda", n_neighbors=10)

KNeighborsRegressor con lda
R^2: 0.44862801122775714 +- 0.003253384235854219
Test: 0.45474903855090953
CPU times: user 2.42 s, sys: 27.6 ms, total: 2.45 s
Wall time: 1.36 s


KNeighborsRegressor(n_neighbors=10)

Con K-vecinos, podemos decir que **estandarizar sí ayudó** ya que tanto en PCA como SVD fue beneficioso (aunque tampoco tanto) y con LDA fue indiferente.

La mejor metodología fue **LDA**

# Árbol de decisión

## PCA

In [54]:
%%time
probar_metodo(DecisionTreeRegressor,X_notest, y_notest, X_test, y_test,
             k=3, metodo= "pca", max_depth=9)

DecisionTreeRegressor con pca
R^2: 0.48854371961611276 +- 0.0056877036938270216
Test: 0.49637536806475324
CPU times: user 2.61 s, sys: 48 ms, total: 2.66 s
Wall time: 1.12 s


DecisionTreeRegressor(max_depth=9)

In [55]:
%%time
probar_metodo(DecisionTreeRegressor, X_notest_st, y_notest, X_test_st, y_test,
             k=3, metodo= "pca", max_depth=9)

DecisionTreeRegressor con pca
R^2: 0.32579747154623145 +- 0.005211544704981865
Test: 0.32957859463819583
CPU times: user 2.52 s, sys: 40.1 ms, total: 2.56 s
Wall time: 1.08 s


DecisionTreeRegressor(max_depth=9)

## SVD

In [56]:
%%time
probar_metodo(DecisionTreeRegressor, X_notest, y_notest, X_test, y_test,
             k=3, metodo= "svd", max_depth=11)

DecisionTreeRegressor con svd
R^2: 0.28791469376081924 +- 0.0058143706664233515
Test: 0.3024552275768799
CPU times: user 3.01 s, sys: 60.3 ms, total: 3.07 s
Wall time: 1.32 s


DecisionTreeRegressor(max_depth=11)

In [57]:
%%time
probar_metodo(DecisionTreeRegressor, X_notest_st, y_notest, X_test_st, y_test,
             k=3, metodo= "svd", max_depth=11)

DecisionTreeRegressor con svd
R^2: 0.30517617003716296 +- 0.005283825185255884
Test: 0.3124643250751904
CPU times: user 2.79 s, sys: 32.1 ms, total: 2.82 s
Wall time: 1.24 s


DecisionTreeRegressor(max_depth=11)

## LDA

In [58]:
%%time
probar_metodo(DecisionTreeRegressor,X_notest, y_notest, X_test, y_test,
             k=3, metodo= "lda", max_depth=9)

DecisionTreeRegressor con lda
R^2: 0.4558690925413009 +- 0.00540611820783944
Test: 0.46422079556713847
CPU times: user 2.17 s, sys: 28.2 ms, total: 2.2 s
Wall time: 1.16 s


DecisionTreeRegressor(max_depth=9)

In [59]:
%%time
probar_metodo(DecisionTreeRegressor, X_notest_st, y_notest, X_test_st, y_test,
             k=3, metodo= "lda", max_depth=9)

DecisionTreeRegressor con lda
R^2: 0.45600066691733376 +- 0.005197461309903817
Test: 0.4637324067629719
CPU times: user 2.16 s, sys: 39.6 ms, total: 2.2 s
Wall time: 1.12 s


DecisionTreeRegressor(max_depth=9)

Para árboles de decisión, en general podemos decir que **estandarizar no ayudó**, ya que en PCA le fue peor, en SVD ayudó tantito y en LDA fue indiferente.

La mejor metodología fue **PCA**

# Random Forest

## PCA

In [41]:
%%time
probar_metodo(RandomForestRegressor, X_notest, y_notest, X_test, y_test,
             k=3, metodo= "pca", n_estimators = 31, random_state = 0)

RandomForestRegressor con pca
R^2: 0.5157523493618581 +- 0.0048419038990016846
Test: 0.5254741565369527
CPU times: user 39 s, sys: 278 ms, total: 39.3 s
Wall time: 37.6 s


RandomForestRegressor(n_estimators=31, random_state=0)

In [42]:
%%time
probar_metodo(RandomForestRegressor, X_notest_st, y_notest, X_test_st, y_test,
             k=3, metodo= "pca", n_estimators = 31, random_state = 0)

RandomForestRegressor con pca
R^2: 0.31153421392488645 +- 0.005354694784241086
Test: 0.3173166969861061
CPU times: user 40.4 s, sys: 191 ms, total: 40.6 s
Wall time: 39.1 s


RandomForestRegressor(n_estimators=31, random_state=0)

## SVD

In [43]:
%%time
probar_metodo(RandomForestRegressor, X_notest, y_notest, X_test, y_test,
             k=3, metodo= "svd", n_estimators = 31, random_state = 0)

RandomForestRegressor con svd
R^2: 0.47305136751529386 +- 0.003636777817901041
Test: 0.48029354812160574
CPU times: user 45 s, sys: 123 ms, total: 45.2 s
Wall time: 43.6 s


RandomForestRegressor(n_estimators=31, random_state=0)

In [44]:
%%time
probar_metodo(RandomForestRegressor, X_notest_st, y_notest, X_test_st, y_test,
             k=3, metodo= "svd", n_estimators = 31, random_state = 0)

RandomForestRegressor con svd
R^2: 0.31200721152011124 +- 0.005800213946496959
Test: 0.31665203037541867
CPU times: user 40.6 s, sys: 648 ms, total: 41.3 s
Wall time: 39.8 s


RandomForestRegressor(n_estimators=31, random_state=0)

## LDA

In [45]:
%%time
probar_metodo(RandomForestRegressor, X_notest, y_notest, X_test, y_test,
             k=3, metodo= "lda", n_estimators = 31, random_state = 0)

RandomForestRegressor con lda
R^2: 0.4640239297740527 +- 0.004150582027362178
Test: 0.4730679403231738
CPU times: user 39.5 s, sys: 172 ms, total: 39.6 s
Wall time: 38.6 s


RandomForestRegressor(n_estimators=31, random_state=0)

In [46]:
%%time
probar_metodo(RandomForestRegressor, X_notest_st, y_notest, X_test_st, y_test,
             k=3, metodo= "lda", n_estimators = 31, random_state = 0)

RandomForestRegressor con lda
R^2: 0.4640239297740527 +- 0.004150582027362178
Test: 0.4730679403231738
CPU times: user 39.4 s, sys: 212 ms, total: 39.6 s
Wall time: 38.5 s


RandomForestRegressor(n_estimators=31, random_state=0)

Para RandomForest, **estandarizar no fue de ayuda**.

El mejor método para reducir fue **PCA**

# GBT

## PCA

In [27]:
%%time
probar_metodo(GradientBoostingRegressor, X_notest, y_notest, X_test, y_test, 
                  k=3, metodo="pca", n_estimators=31, learning_rate=1.0, max_depth=2, random_state=42)

GradientBoostingRegressor con pca
R^2: 0.46574607587115613 +- 0.0050116511685733235
Test: 0.47133351864180784
CPU times: user 9.27 s, sys: 78.4 ms, total: 9.35 s
Wall time: 7.51 s


GradientBoostingRegressor(learning_rate=1.0, max_depth=2, n_estimators=31,
                          random_state=42)

In [26]:
%%time
probar_metodo(GradientBoostingRegressor, X_notest_st, y_notest, X_test_st, y_test, 
                  k=3, metodo="pca", n_estimators=31, learning_rate=1.0, max_depth=2, random_state=42)

GradientBoostingRegressor con pca
R^2: 0.3226376941972746 +- 0.005642548517168225
Test: 0.3302961415674527
CPU times: user 9.75 s, sys: 252 ms, total: 10 s
Wall time: 8.38 s


GradientBoostingRegressor(learning_rate=1.0, max_depth=2, n_estimators=31,
                          random_state=42)

## SVD

In [28]:
%%time
probar_metodo(GradientBoostingRegressor, X_notest, y_notest, X_test, y_test, 
                  k=3, metodo="svd", n_estimators=31, learning_rate=1.0, max_depth=2, random_state=42)

GradientBoostingRegressor con svd
R^2: 0.26720199269917 +- 0.0012467928510788513
Test: 0.269704911394694
CPU times: user 9.57 s, sys: 130 ms, total: 9.7 s
Wall time: 7.77 s


GradientBoostingRegressor(learning_rate=1.0, max_depth=2, n_estimators=31,
                          random_state=42)

In [29]:
%%time
probar_metodo(GradientBoostingRegressor, X_notest_st, y_notest, X_test_st, y_test, 
                  k=3, metodo="svd", n_estimators=31, learning_rate=1.0, max_depth=2, random_state=42)

GradientBoostingRegressor con svd
R^2: 0.32142688330499064 +- 0.005353303308812006
Test: 0.32705014816617517
CPU times: user 9.64 s, sys: 78.2 ms, total: 9.72 s
Wall time: 7.86 s


GradientBoostingRegressor(learning_rate=1.0, max_depth=2, n_estimators=31,
                          random_state=42)

## LDA

In [30]:
%%time
probar_metodo(GradientBoostingRegressor, X_notest, y_notest, X_test, y_test, 
                  k=3, metodo="lda", n_estimators=31, learning_rate=1.0, max_depth=2, random_state=42)

GradientBoostingRegressor con lda
R^2: 0.4437121746800048 +- 0.005654737367492657
Test: 0.4484815864430197
CPU times: user 8.86 s, sys: 18 ms, total: 8.88 s
Wall time: 7.77 s


GradientBoostingRegressor(learning_rate=1.0, max_depth=2, n_estimators=31,
                          random_state=42)

In [31]:
%%time
probar_metodo(GradientBoostingRegressor, X_notest_st, y_notest, X_test_st, y_test, 
                  k=3, metodo="lda", n_estimators=31, learning_rate=1.0, max_depth=2, random_state=42)

GradientBoostingRegressor con lda
R^2: 0.4437121746800048 +- 0.005654737367492657
Test: 0.4484815864430197
CPU times: user 8.84 s, sys: 26.9 ms, total: 8.87 s
Wall time: 7.77 s


GradientBoostingRegressor(learning_rate=1.0, max_depth=2, n_estimators=31,
                          random_state=42)

Con Gradient Boosting Machine vemos que en PCA empeoró, en SVD sí mejoró y en LDA fue indiferente, por lo que podemos decir que, en general, **estandarizar no ayudó**.

Se alcanzó mejores resultados con **PCA**

# LGBM

## PCA

In [60]:
%%time
probar_metodo(lgb.LGBMRegressor, X_notest, y_notest, X_test, y_test, k=3, metodo="pca", 
              boosting_type="dart", n_estimators=31, learning_rate=1.95)

LGBMRegressor con pca
R^2: 0.047352449577081096 +- 0.019373112925562447
Test: 0.050175506844550744
CPU times: user 6.31 s, sys: 130 ms, total: 6.44 s
Wall time: 1.58 s


LGBMRegressor(boosting_type='dart', learning_rate=1.95, n_estimators=31)

In [62]:
%%time
probar_metodo(lgb.LGBMRegressor, X_notest_st, y_notest, X_test_st, y_test, k=3, metodo="pca", 
              boosting_type="dart", n_estimators=31, learning_rate=1.95)

LGBMRegressor con pca
R^2: 0.01619542866839549 +- 0.014879683312437799
Test: 0.023469882321882962
CPU times: user 3.05 s, sys: 16.2 ms, total: 3.07 s
Wall time: 544 ms


LGBMRegressor(boosting_type='dart', learning_rate=1.95, n_estimators=31)

## SVD

In [63]:
%%time
probar_metodo(lgb.LGBMRegressor, X_notest, y_notest, X_test, y_test, k=3, metodo="svd", 
              boosting_type="dart", n_estimators=31, learning_rate=2.15)

LGBMRegressor con svd
R^2: -6.233646443780534 +- 0.09855816068573812
Test: -6.168702294750761
CPU times: user 3.1 s, sys: 44.4 ms, total: 3.15 s
Wall time: 563 ms


LGBMRegressor(boosting_type='dart', learning_rate=2.15, n_estimators=31)

In [64]:
%%time
probar_metodo(lgb.LGBMRegressor, X_notest_st, y_notest, X_test_st, y_test, k=3, metodo="svd", 
              boosting_type="dart", n_estimators=31, learning_rate=2.15)

LGBMRegressor con svd
R^2: -8.628945872587654 +- 0.11617841235135916
Test: -8.53093353354625
CPU times: user 3.11 s, sys: 39.7 ms, total: 3.15 s
Wall time: 564 ms


LGBMRegressor(boosting_type='dart', learning_rate=2.15, n_estimators=31)

## LDA

In [65]:
%%time
probar_metodo(lgb.LGBMRegressor, X_notest, y_notest, X_test, y_test, k=3, metodo="lda", 
              boosting_type="dart", n_estimators=31, learning_rate=1.95)

LGBMRegressor con lda
R^2: 0.02882809643943912 +- 0.0072701717618820485
Test: 0.037745985319599495
CPU times: user 2.79 s, sys: 28 ms, total: 2.81 s
Wall time: 633 ms


LGBMRegressor(boosting_type='dart', learning_rate=1.95, n_estimators=31)

In [66]:
%%time
probar_metodo(lgb.LGBMRegressor, X_notest_st, y_notest, X_test_st, y_test, k=3, metodo="lda", 
              boosting_type="dart", n_estimators=31, learning_rate=1.95)

LGBMRegressor con lda
R^2: 0.02882809643943912 +- 0.0072701717618820485
Test: 0.037745985319599495
CPU times: user 2.6 s, sys: 15.9 ms, total: 2.62 s
Wall time: 582 ms


LGBMRegressor(boosting_type='dart', learning_rate=1.95, n_estimators=31)

En LGBM podemos decir que **estandarizar no ayudó**

la mejor metodología fue **PCA**

Hay algo raro con este modelo, ya que parece ser que si usas su propio score, da otras cosas

-------------------

En general, creo que estandarizar no resultó útil

De 7 modelos, con 4 resultó mejor usar PCA y con los otros 3 usar LDA.


---------------

# Búsqueda de hiperpárametros

## Random Forest

In [45]:
params = {"n_estimators":[10,20,25,30],
          "max_depth":[6,9,12,15],
          "min_samples_leaf":[1,2,4,8], 
          "max_features":["auto", "sqrt", "log2"],
          "bootstrap":[True, False],
          "random_state":[0], "max_samples":[0.75]}

In [46]:
%%time
grid_rf = GridSearchCV(RandomForestRegressor(), params,
                          n_jobs=-1, cv=4, verbose=0)
grid_rf.fit(X_notest_r, y_notest)

print(grid_rf.best_score_)
grid_rf.best_params_

0.532396169615266
CPU times: user 7.53 s, sys: 609 ms, total: 8.14 s
Wall time: 11min 43s


{'bootstrap': False,
 'max_depth': 15,
 'max_features': 'sqrt',
 'max_samples': 0.75,
 'min_samples_leaf': 1,
 'n_estimators': 30,
 'random_state': 0}

## Gradient Boosting Machine

In [56]:
params = {"loss":["ls", "lad", "huber"],
          "learning_rate":[0.1, 0.01],
          "n_estimators":[25,50,100,150],
          "subsample":[1.0, 0.75],
          "min_samples_leaf":[2,4,8]}

In [57]:
%%time
grid_gbm = GridSearchCV(GradientBoostingRegressor(random_state=3,
                                              n_iter_no_change=10), 
                    params, n_jobs=-1, cv=4, verbose=0)
grid_gbm.fit(X_notest_r, y_notest)

print(grid_gbm.best_score_)
grid_gbm.best_params_

0.503651768367076
CPU times: user 13.5 s, sys: 182 ms, total: 13.7 s
Wall time: 12min 1s


{'learning_rate': 0.1,
 'loss': 'ls',
 'min_samples_leaf': 2,
 'n_estimators': 150,
 'subsample': 0.75}