# 4.1 - Más Modelos

In [1]:
import pandas as pd

from sklearn.datasets import load_diabetes, load_wine

from sklearn.model_selection import train_test_split as tts

In [2]:
diabetes=pd.DataFrame(load_diabetes().data, columns=load_diabetes().feature_names)
diabetes['target']=load_diabetes().target

diabetes.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0


In [3]:
wine=pd.DataFrame(load_wine().data, columns=load_wine().feature_names)
wine['target']=load_wine().target

wine.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


In [4]:
X_train_d, X_test_d, y_train_d, y_test_d = tts(diabetes.drop('target', axis=1), 
                                               diabetes.target)

In [5]:
X_train_v, X_test_v, y_train_v, y_test_v = tts(wine.drop('target', axis=1), wine.target) 

In [7]:
def clasi(modelo):
    
    modelo.fit(X_train_v, y_train_v)
    
    train_score=modelo.score(X_train_v, y_train_v)  # accuracy
    test_score=modelo.score(X_test_v, y_test_v)
    
    print(modelo)
    print('Train Acc:', train_score)
    print('Test Acc:', test_score)
    
    return modelo

In [8]:
def regre(modelo):
    
    modelo.fit(X_train_d, y_train_d)
    
    train_score=modelo.score(X_train_d, y_train_d)  # R2
    test_score=modelo.score(X_test_d, y_test_d)
    
    print(modelo)
    print('Train R2:', train_score)
    print('Test R2:', test_score)
    
    return modelo

# Gradiente Descendente Estocástico SGD

![grad1](images/grad1.png)

![grad2](images/grad2.jpg)

Es una derivación numérica. Lo estocástico viene de la elección de la muestra, en vez de coger todos los datos solo coge uno. Reduce el costo computacional. Sirve como regresor o como clasificador.

##### SGDR

In [10]:
from sklearn.linear_model import SGDRegressor as SGDR

In [15]:
sgdr=SGDR(max_iter=50000)

sgdr=regre(sgdr)

SGDRegressor(max_iter=50000)
Train R2: 0.5027193501849029
Test R2: 0.5155941021144415


In [13]:
#help(sgdr)

##### SGDC

In [17]:
from sklearn.linear_model import SGDClassifier as SGDC

In [18]:
sgdc=SGDC()

sgdc=clasi(sgdc)

SGDClassifier()
Train Acc: 0.6992481203007519
Test Acc: 0.6


In [20]:
#help(sgdc)

# Naive Bayes

$$P(clase|data)= \frac{P(data|clase)·P(clase)}{P(data)}$$

+ P(data|clase) es $N(\mu, \sigma)$ (datos normalizados)

+ P(clase) es el a priori 

+ P(data) no se calcula

+ P(clase|data) el a posteriori, lo que se busca

Lo de Naive viene de la suposición de independencia condicional entre predictores, lo cuál no suele ser verdad.


$$\hat{y}=argmax [P(y) \prod_{i=1}^{n} P(x_i|y)]$$

### GNB (Gaussian Naive Bayes)

In [21]:
from sklearn.naive_bayes import GaussianNB as GNB

gnb=GNB()

gnb=clasi(gnb)

GaussianNB()
Train Acc: 0.9774436090225563
Test Acc: 0.9777777777777777


In [23]:
#help(gnb)

### MNB (Multinomial Naive Bayes)

In [24]:
from sklearn.naive_bayes import MultinomialNB as MNB

mnb=MNB()

mnb=clasi(mnb)

MultinomialNB()
Train Acc: 0.8646616541353384
Test Acc: 0.7333333333333333


In [None]:
#help(mnb)

### CNB (Complement Naive Bayes)

In [25]:
from sklearn.naive_bayes import ComplementNB as CNB

cnb=CNB()

cnb=clasi(cnb)

ComplementNB()
Train Acc: 0.6842105263157895
Test Acc: 0.4888888888888889


In [26]:
#help(cnb)

### BNB (Bernoulli Naive Bayes)

In [27]:
from sklearn.naive_bayes import BernoulliNB as BNB

bnb=BNB()

bnb=clasi(bnb)

BernoulliNB()
Train Acc: 0.42105263157894735
Test Acc: 0.3333333333333333


In [28]:
#help(bnb)

# K-Nearest Neighbors

![knn](images/knn.png)

Este modelo se basa en al idea de vecindario, según sea lo que rodea al nuevo dato, así se clasificará.

Básicamente se trata de calcular las distancias (euclídea o L1 normalmente) entre los datos, para asignar el valor (etiqueta) al nuevo dato según sean sus vecinos.

Se suele tomar $k$ como un número impar para poder tomar una decisión en caso de empate.

#### KNNR

In [29]:
from sklearn.neighbors import KNeighborsRegressor as KNNR

In [41]:
knnr=KNNR(n_neighbors=5, weights='distance')

knnr=regre(knnr)

KNeighborsRegressor(weights='distance')
Train R2: 1.0
Test R2: 0.377145612903084


In [40]:
#help(knnr)

#### KNNC

In [43]:
from sklearn.neighbors import KNeighborsClassifier as KNNC

In [58]:
knnc=KNNC(n_neighbors=20)

knnc=clasi(knnc)

KNeighborsClassifier(n_neighbors=20)
Train Acc: 0.7669172932330827
Test Acc: 0.6


In [47]:
#help(knnc)

# Ensemble Models


### Bagging  -  Boosting


![boost](images/boost.png)

#### Gradient Boost Regressor

#### Gradient Boost Classifier

## Extreme Gradient Boosting

#### XGBR

#### XGBC

### Catboost

##### CTR

##### CTC

## LightGBM

#### LGBR

#### LGBC

# PyCaret

https://pycaret.org/tutorial/

https://colab.research.google.com/drive/1GqQ3XAIzg4krBbnOpKyeRqT0qBQhdwYL#scrollTo=lUvE187JEQm3

# H2o


https://www.h2o.ai/

# Stacking


![stacking](images/stacking.png)

#### mlxtend