#### **PREPARACIÓN DEL ENTORNO**


In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from random import randint
import pandas as pd
import pickle
from tqdm import tqdm
tqdm.pandas()

from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [2]:
tqdm.pandas()

#### **PREPROCESADO DE IMÁGENES**


In [3]:
data = '../input/cvsfeatures/df_img_processed2.csv'

df = pd.read_csv(data)

In [4]:
df

Unnamed: 0,winery,product,region,country_code,rating,variety,price_usd,image,continente,country_code_clean,...,4086,4087,4088,4089,4090,4091,4092,4093,4094,4095
0,cathedral ridge,cathedral ridge bangsund vineyard pinot noir 2006,columbia valley oregon,us,3.5,pinot noir,163.000000,vintage-images/MBo5T9ZQQQGbzsRmVORp-w_pl_480x6...,north america,united states,...,0.000000,2.723852,0.112117,1.256278,0.000000,0.0,0.688721,0.0,0.000000,0.641749
1,penfolds,penfolds rwt shiraz 2005,barossa valley,au,4.5,shiraz syrah,169.950000,vintage-images/dDyTwzIfQC68MCUgjmYWpw_pl_480x6...,oceania,australia,...,1.020779,0.000000,0.000000,2.622155,0.000000,0.0,0.290430,0.0,0.000000,0.000000
2,marques de riscal,marques de riscal frank gehry selection 2001,rioja,es,5.0,tempranillo,357.196682,vintage-images/t44NPpbUT6uy0vRt4VD2RQ_pl_480x6...,europe,spain,...,0.000000,0.555558,0.000000,0.656157,0.000000,0.0,0.000000,0.0,0.000000,1.079562
3,wyndham,wyndham pinot noir bin 333 2007,south eastern australia,au,4.0,pinot noir,10.990000,vintage-images/v1RahJe2QQyp4oAFOv90bA_pl_480x6...,oceania,australia,...,0.000000,1.728447,0.707242,0.000000,0.000000,0.0,0.000000,0.0,0.000000,1.881952
4,louis jadot,louis jadot chevalier montrachet grand cru 2006,chevalier montrachet grand cru,fr,5.0,chardonnay,309.990000,vintage-images/VlMihlZUQE6US7fr09w5ZA_pl_480x6...,europe,france,...,0.000000,2.167833,0.000000,0.000000,0.000000,0.0,0.400719,0.0,0.000000,2.144384
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9873,jermann,jermann blau blau rosso,delle venezie,it,4.0,blaufrankisch,35.138185,vintage-images/mpvstrRZSD-yVA3__GEWng_pl_480x6...,europe,italy,...,0.000000,0.210194,0.000000,1.297643,0.000000,0.0,0.000000,0.0,0.162085,2.841119
9874,caposaldo,caposaldo sweet pink merlot moscato,veneto,it,2.0,merlot,10.990000,vintage-images/jYq_vdJgRCGsVqvs5vX3fg_pl_480x6...,europe,italy,...,1.358612,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.788493,2.711399
9875,chalone vineyard,chalone vineyard monterey pinot noir,monterey county,us,3.5,pinot noir,15.990000,vintage-images/4PY0biY6St2tFoytqwopJg_pl_480x6...,north america,united states,...,0.000000,1.227074,0.727661,0.000000,0.000000,0.0,0.000000,0.0,2.678588,1.821980
9876,san antonio winery,san antonio winery imperial red semi sweet,california,us,4.0,merlot,8.990000,vintage-images/9YJV02cwSriLjK1al5G6DA_pl_480x6...,north america,united states,...,3.614763,0.000000,0.000000,1.649501,0.000000,0.0,0.000000,0.0,0.000000,0.000000


In [5]:
print(f"Existen {len(df['variety'].value_counts())} variedades de vinos")

Existen 257 variedades de vinos


In [6]:
df_variety = df['variety'].value_counts().rename_axis('variety').to_frame('counts')
df_variety

Unnamed: 0_level_0,counts
variety,Unnamed: 1_level_1
cabernet sauvignon,2215
chardonnay,1030
shiraz syrah,802
pinot noir,714
merlot,519
...,...
symphony,1
moscatel,1
cencibel,1
treixadura,1


Vemos que existen 257 variedades de vinos, pero vemos que los 5 principales variedades representan más del 53% de los datos.

In [7]:
df_variety_selected = df_variety[:15]
df_variety_selected

Unnamed: 0_level_0,counts
variety,Unnamed: 1_level_1
cabernet sauvignon,2215
chardonnay,1030
shiraz syrah,802
pinot noir,714
merlot,519
sangiovese,431
nebbiolo,339
sauvignon blanc,300
tempranillo,281
touriga nacional,258


In [8]:
df_variety_selected.sum()

counts    7822
dtype: int64

Si consideramos las 15 principales categorias, obtenemos una representación cercana al 80% de los datos.

En este punto, vamos a proceder a utilizar 16 categorias de esta columna, incluyendo como Otras las variety que no estan representadas en el 80% seleccionado.

In [9]:
list_variety = list(df_variety_selected.index)

def transform_variety(variety, list_variety):
  if variety in list_variety:
    return variety
  else:
    return "other"

df['variety'] = df.progress_apply(lambda x:transform_variety(x.variety,list_variety), axis=1)

100%|██████████| 9878/9878 [00:02<00:00, 3545.71it/s]


In [10]:
df['variety'].value_counts()

cabernet sauvignon    2215
other                 2056
chardonnay            1030
shiraz syrah           802
pinot noir             714
merlot                 519
sangiovese             431
nebbiolo               339
sauvignon blanc        300
tempranillo            281
touriga nacional       258
malbec                 222
cabernet franc         219
grenache               178
riesling               157
zinfandel              157
Name: variety, dtype: int64

In [11]:
le = LabelEncoder()
variedad = le.fit_transform(df['variety'])

In [12]:
from sklearn.neighbors import KNeighborsClassifier

In [13]:
X = df.iloc[:,15:].values
y = variedad

In [14]:
X.shape, y.shape

((9878, 4096), (9878,))

In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)

**KNN**

In [16]:
classifier_knn = KNeighborsClassifier()

In [17]:
classifier_knn.fit(X_train, y_train)


KNeighborsClassifier()

In [18]:
y_pred_knn = classifier_knn.predict(X_test)

In [19]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

result = confusion_matrix(y_test, y_pred_knn)

print('Confusion Matrix KNN')

print(result)


print('Accuracy:', accuracy_score(y_test, y_pred_knn))

Confusion Matrix KNN
[[ 37   5   5   1   1   3   2   7   5   0   1   0   2   0   0   0]
 [ 14 508   4   2  15  31  17  54   6   0  24   0  18  16   9   2]
 [  0  10 184   7   2   1   9  49  15   3   1   5   6   4   2   0]
 [  0   6   1  13   0   3   0   7   7   0   0   0  10   0   3   0]
 [  3  33   0   1   8   5   0   6   0   0   1   0   1   0   1   0]
 [  2  77   0   1   4  52   5  16   1   0   7   1   4   0   3   1]
 [  3  18   0   1   2   6  44  10   6   0   1   1   6   0   3   0]
 [  9 116  93   5  10  10  21 192  28   5  11  17  30   1  10   4]
 [  1  12  34  13   2   3  13  26  94   0   2   1  19   1   3   0]
 [  0   2  14   1   0   0   0  18   4   3   0   0   1   1   0   0]
 [  3  46   4   1   2  12   2  21   1   0  27   0   4   1   3   3]
 [  0  17  16   0   1   0   1  31   6   1   1  17   2   0   0   1]
 [  4  44   6  11   6   8  12  36  23   0   4   2  74   3   2   2]
 [  1  33   1   0   2   7   1  10   4   0   1   0   4  15   4   0]
 [  3  21   1   0   4   0   2  10   3   0

In [20]:
print(classification_report(y_test,y_pred_knn,target_names=le.classes_))

                    precision    recall  f1-score   support

    cabernet franc       0.46      0.54      0.49        69
cabernet sauvignon       0.52      0.71      0.60       720
        chardonnay       0.51      0.62      0.56       298
          grenache       0.23      0.26      0.24        50
            malbec       0.14      0.14      0.14        59
            merlot       0.37      0.30      0.33       174
          nebbiolo       0.34      0.44      0.38       101
             other       0.38      0.34      0.36       562
        pinot noir       0.46      0.42      0.44       224
          riesling       0.25      0.07      0.11        44
        sangiovese       0.32      0.21      0.25       130
   sauvignon blanc       0.39      0.18      0.25        94
      shiraz syrah       0.39      0.31      0.35       237
       tempranillo       0.35      0.18      0.24        83
  touriga nacional       0.36      0.33      0.34        76
         zinfandel       0.28      0.12

In [21]:
print("Test accuracy: %.3f" % classifier_knn.score(X_test, y_test))

Test accuracy: 0.438


**SVC**

In [22]:
from sklearn.svm import SVC
classifier_svc = SVC(kernel='rbf')

In [23]:
classifier_svc.fit(X_train, y_train)

SVC()

In [24]:
y_pred_svc = classifier_svc.predict(X_test)

In [25]:
result = confusion_matrix(y_test, y_pred_svc)

print('Confusion Matrix SVC')

print(result)


print('Accuracy:', accuracy_score(y_test, y_pred_svc))

Confusion Matrix SVC
[[ 13  36   4   0   0   0   0   7   9   0   0   0   0   0   0   0]
 [  0 629   3   0   0   1   0  80   4   0   1   0   2   0   0   0]
 [  0   9 173   0   0   0   0 104   9   0   0   0   3   0   0   0]
 [  0   9   1   6   0   0   0  14  10   0   0   0  10   0   0   0]
 [  0  47   0   0   4   0   0   8   0   0   0   0   0   0   0   0]
 [  0 120   0   0   0  27   0  25   1   0   1   0   0   0   0   0]
 [  0  44   1   0   0   0  16  31   7   0   0   0   2   0   0   0]
 [  0 127  65   0   0   0   0 328  33   0   0   0   7   0   2   0]
 [  0   9  18   0   0   0   0  68 118   0   0   0  11   0   0   0]
 [  0   1  11   0   0   0   0  30   1   0   0   0   1   0   0   0]
 [  0  87   0   0   0   0   0  28   0   0  15   0   0   0   0   0]
 [  0   8  17   0   0   0   0  55   1   0   0  12   1   0   0   0]
 [  0  76   4   0   0   0   0  68  53   0   1   0  35   0   0   0]
 [  0  46   0   0   0   0   0  28   6   0   0   0   1   2   0   0]
 [  0  33   0   0   0   0   0  14   3   0

In [26]:
print(classification_report(y_test,y_pred_svc,target_names=le.classes_))

                    precision    recall  f1-score   support

    cabernet franc       1.00      0.19      0.32        69
cabernet sauvignon       0.48      0.87      0.62       720
        chardonnay       0.58      0.58      0.58       298
          grenache       1.00      0.12      0.21        50
            malbec       1.00      0.07      0.13        59
            merlot       0.96      0.16      0.27       174
          nebbiolo       1.00      0.16      0.27       101
             other       0.36      0.58      0.45       562
        pinot noir       0.46      0.53      0.49       224
          riesling       0.00      0.00      0.00        44
        sangiovese       0.83      0.12      0.20       130
   sauvignon blanc       1.00      0.13      0.23        94
      shiraz syrah       0.47      0.15      0.23       237
       tempranillo       1.00      0.02      0.05        83
  touriga nacional       0.93      0.33      0.49        76
         zinfandel       0.00      0.00

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
print("Test accuracy: %.3f" % classifier_svc.score(X_test, y_test))

Test accuracy: 0.473


**Ajuste de Hiperparametros:**

Vamos a proceder a optimizar nuestro modelo aplicando escalado a los datos y reducciendo dimensionalidad mediante PCA, además de utilizar GridSearchCV para buscar los mejores parametros asociados al Clasificador KNN.

In [28]:
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

# Normalizacion

scaler=StandardScaler()

scaler.fit(X) # calculo la media para poder hacer la transformacion
X_scaled=scaler.transform(X)# Ahora si, escalo los datos y los normalizo

#Instanciamos objeto PCA y aplicamos
pca=PCA(n_components=1000) 
pca.fit(X_scaled) # obtener los componentes principales
X_pca=pca.transform(X_scaled) # convertimos nuestros datos con las nuevas dimensiones de PCA

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size = 0.3, random_state=42)

In [30]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((6914, 1000), (2964, 1000), (6914,), (2964,))

**KNN normalizado**

In [31]:
classifier_knn_pca = KNeighborsClassifier(n_neighbors=5, p=3, weights='distance')

In [32]:
classifier_knn_pca.fit(X_train,y_train)

KNeighborsClassifier(p=3, weights='distance')

In [33]:
y_pred_knn_pca = classifier_knn_pca.predict(X_test)

In [34]:
result = confusion_matrix(y_test, y_pred_knn_pca)

print('Confusion Matrix KNN - Normalizado y PCA')

print(result)


print('Accuracy:', accuracy_score(y_test, y_pred_knn_pca))

Confusion Matrix KNN - Normalizado y PCA
[[ 40   4   3   1   0   3   4   5   3   0   2   0   1   1   2   0]
 [  9 510   6   4   7  33   8  57   9   0  11   1  31  15  12   7]
 [  0   6 174   5   1   0   6  62  17   1   2  11   9   3   0   1]
 [  0   7   1  21   0   2   1   5   5   0   0   2   5   0   1   0]
 [  1  23   0   1  10   4   1  13   0   0   2   0   1   0   2   1]
 [  0  46   0   3   0  75   0  24   3   0   5   1  10   3   2   2]
 [  0   9   1   0   0   3  60   9   8   0   1   0   8   1   1   0]
 [  2  87  72   5   4  13  11 251  27   4  16  23  31   5   7   4]
 [  0  12  31   6   2   3  12  28  99   1   0   0  25   1   3   1]
 [  0   1  10   1   0   0   0  17   3  10   0   0   2   0   0   0]
 [  0  30   1   1   1   4   5  21   0   0  50   1   3   1   8   4]
 [  0   8  17   0   2   2   2  26   3   2   2  27   1   0   0   2]
 [  2  29  10   7   3   9   7  27  22   1   6   3 102   5   2   2]
 [  0  19   1   0   4   2   3   9   7   0   5   0   4  28   0   1]
 [  3  10   1   1   1

In [35]:
print(classification_report(y_test,y_pred_knn_pca,target_names=le.classes_))

                    precision    recall  f1-score   support

    cabernet franc       0.68      0.58      0.63        69
cabernet sauvignon       0.63      0.71      0.67       720
        chardonnay       0.53      0.58      0.55       298
          grenache       0.38      0.42      0.40        50
            malbec       0.29      0.17      0.21        59
            merlot       0.48      0.43      0.45       174
          nebbiolo       0.48      0.59      0.53       101
             other       0.44      0.45      0.44       562
        pinot noir       0.47      0.44      0.46       224
          riesling       0.53      0.23      0.32        44
        sangiovese       0.47      0.38      0.42       130
   sauvignon blanc       0.39      0.29      0.33        94
      shiraz syrah       0.41      0.43      0.42       237
       tempranillo       0.44      0.34      0.38        83
  touriga nacional       0.45      0.43      0.44        76
         zinfandel       0.19      0.14

In [36]:
print("Test accuracy: %.3f" % classifier_knn_pca.score(X_test, y_test))

Test accuracy: 0.505


**SVC Normalizado**

In [37]:
classifier_svc_pca = SVC(C=10,gamma=0.01,kernel='rbf')

In [38]:
classifier_svc_pca.fit(X_train, y_train)

SVC(C=10, gamma=0.01)

In [39]:
y_pred_svc_pca = classifier_svc_pca.predict(X_test)

In [40]:
result = confusion_matrix(y_test, y_pred_svc_pca)

print('Confusion Matrix SVC - Normalizado y PCA')

print(result)


print('Accuracy:', accuracy_score(y_test, y_pred_svc_pca))

Confusion Matrix SVC - Normalizado y PCA
[[ 38   0   0   0   0   0   0  31   0   0   0   0   0   0   0   0]
 [  0 368   0   0   0   0   0 352   0   0   0   0   0   0   0   0]
 [  0   0  30   0   0   0   0 268   0   0   0   0   0   0   0   0]
 [  0   0   0  17   0   0   0  33   0   0   0   0   0   0   0   0]
 [  0   0   0   0   8   0   0  51   0   0   0   0   0   0   0   0]
 [  0   1   0   0   0  71   0 102   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0  50  51   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0 562   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0 187  37   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0  37   0   7   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0  91   0   0  39   0   0   0   0   0]
 [  0   0   0   0   0   0   0  77   0   0   0  17   0   0   0   0]
 [  0   0   0   0   0   0   0 162   0   0   0   0  75   0   0   0]
 [  0   0   0   0   0   0   0  59   0   0   0   0   0  24   0   0]
 [  0   0   0   0   0

In [41]:
print(classification_report(y_test,y_pred_svc_pca,target_names=le.classes_))

                    precision    recall  f1-score   support

    cabernet franc       1.00      0.55      0.71        69
cabernet sauvignon       1.00      0.51      0.68       720
        chardonnay       1.00      0.10      0.18       298
          grenache       1.00      0.34      0.51        50
            malbec       1.00      0.14      0.24        59
            merlot       1.00      0.41      0.58       174
          nebbiolo       1.00      0.50      0.66       101
             other       0.26      1.00      0.41       562
        pinot noir       1.00      0.17      0.28       224
          riesling       1.00      0.16      0.27        44
        sangiovese       1.00      0.30      0.46       130
   sauvignon blanc       1.00      0.18      0.31        94
      shiraz syrah       1.00      0.32      0.48       237
       tempranillo       1.00      0.29      0.45        83
  touriga nacional       1.00      0.41      0.58        76
         zinfandel       1.00      0.05

In [42]:
print("Test accuracy: %.3f" % classifier_svc_pca.score(X_test, y_test))

Test accuracy: 0.464


**BALANCEADO DE DATOS**

In [43]:
from imblearn.over_sampling import RandomOverSampler

ros = RandomOverSampler(sampling_strategy="not majority")
X_res, y_res = ros.fit_resample(X_pca,y)

X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size = 0.3, random_state=42)

**KNN BALANCEADO**

In [44]:
classifier_knn_balanced = KNeighborsClassifier(n_neighbors=5, p=3, weights='distance')
classifier_knn_balanced.fit(X_train, y_train)

KNeighborsClassifier(p=3, weights='distance')

In [45]:
y_pred_balanced = classifier_knn_balanced.predict(X_test)

In [46]:
result_balanced = confusion_matrix(y_test, y_pred_balanced)

print('Confusion Matrix - Clasificador KNN optimizado y Balanceado')

print(result_balanced)
print('Accuracy:', accuracy_score(y_test, y_pred_balanced))

Confusion Matrix - Clasificador KNN optimizado y Balanceado
[[628   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [ 20 367   6   7  27  35  21  17   8   1  46   5  26  24  21  21]
 [  3   1 594   2   0   2   2   8  14   6   2  25   8   1   0   1]
 [  0   0   0 685   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0 663   0   0   0   0   0   0   0   0   0   0   0]
 [  1   0   0   1   0 639   0   0   5   3   1   2   0   2   0   0]
 [  0   0   0   0   0   0 669   0   0   0   0   0   0   4   0   0]
 [  6  32  73  22  31  37  28 200  43  18  41  61  39  26  22  20]
 [  1   0   9   1   1   5   7   1 637   0   3   0   9   0   0   5]
 [  0   0   0   0   0   0   0   0   0 637   0   0   0   0   0   0]
 [  0   3   0   1   0   3   0   0   0   0 609   0   0   0   0   1]
 [  0   0   0   0   0   0   0   2   0   0   0 678   0   0   0   0]
 [  1   6   0   2   2   5   5   5   6   2   3   1 617   6   1   2]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0 676   0   0]
 [

In [47]:
print(classification_report(y_test,y_pred_balanced,target_names=le.classes_))

                    precision    recall  f1-score   support

    cabernet franc       0.95      1.00      0.98       628
cabernet sauvignon       0.90      0.56      0.69       652
        chardonnay       0.87      0.89      0.88       669
          grenache       0.95      1.00      0.97       685
            malbec       0.92      1.00      0.96       663
            merlot       0.88      0.98      0.93       654
          nebbiolo       0.91      0.99      0.95       673
             other       0.86      0.29      0.43       699
        pinot noir       0.89      0.94      0.92       679
          riesling       0.96      1.00      0.98       637
        sangiovese       0.86      0.99      0.92       617
   sauvignon blanc       0.88      1.00      0.93       680
      shiraz syrah       0.88      0.93      0.91       664
       tempranillo       0.91      1.00      0.96       676
  touriga nacional       0.94      1.00      0.97       694
         zinfandel       0.93      1.00

In [48]:
print("Test accuracy: %.3f" % classifier_knn_balanced.score(X_test, y_test))

Test accuracy: 0.908


In [49]:
# Validacion anidada
scores = cross_val_score(classifier_knn_balanced,X_train,y_train, scoring='accuracy',cv=5)

In [50]:
print('CV accuracy: %.3f +/- %.3f' %(np.mean(scores), np.std(scores)))

CV accuracy: 0.891 +/- 0.002


**SVC BALANCEADO**

In [51]:
classifier_svc_balanced = SVC(C=10,gamma=0.01,kernel='rbf')

In [52]:
classifier_svc_balanced.fit(X_train, y_train)

SVC(C=10, gamma=0.01)

In [53]:
y_pred_balanced= classifier_svc_balanced.predict(X_test)

In [54]:
result = confusion_matrix(y_test, y_pred_balanced)

print('Confusion Matrix - SVC Balanceado')

print(result)


print('Accuracy:', accuracy_score(y_test, y_pred_balanced))

Confusion Matrix - SVC Balanceado
[[628   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0 314   0   0   1   0   0 337   0   0   0   0   0   0   0   0]
 [  0   0 529   0   0   0   0 140   0   0   0   0   0   0   0   0]
 [  0   0   0 685   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0 663   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0 638   0  16   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0 669   4   0   0   0   0   0   0   0   0]
 [  0   1   0   0   0   0   0 697   0   0   0   0   1   0   0   0]
 [  0   0   0   0   0   0   0  65 614   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0 637   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   8   0   0 609   0   0   0   0   0]
 [  0   0   0   0   0   0   0   2   0   0   0 678   0   0   0   0]
 [  0   0   0   0   0   0   0  67   0   0   0   0 597   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0 676   0   0]
 [  0   0   0   0   0   0   

In [55]:
print(classification_report(y_test,y_pred_balanced,target_names=le.classes_))

                    precision    recall  f1-score   support

    cabernet franc       1.00      1.00      1.00       628
cabernet sauvignon       1.00      0.48      0.65       652
        chardonnay       1.00      0.79      0.88       669
          grenache       1.00      1.00      1.00       685
            malbec       1.00      1.00      1.00       663
            merlot       1.00      0.98      0.99       654
          nebbiolo       1.00      0.99      1.00       673
             other       0.52      1.00      0.69       699
        pinot noir       1.00      0.90      0.95       679
          riesling       1.00      1.00      1.00       637
        sangiovese       1.00      0.99      0.99       617
   sauvignon blanc       1.00      1.00      1.00       680
      shiraz syrah       1.00      0.90      0.95       664
       tempranillo       1.00      1.00      1.00       676
  touriga nacional       1.00      1.00      1.00       694
         zinfandel       1.00      1.00

In [56]:
print("Test accuracy: %.3f" % classifier_svc_balanced.score(X_test, y_test))

Test accuracy: 0.940


In [57]:
# Validacion anidada
scores = cross_val_score(classifier_svc_balanced,X_train,y_train, scoring='accuracy',cv=5)

In [58]:
print('CV accuracy: %.3f +/- %.3f' %(np.mean(scores), np.std(scores)))

CV accuracy: 0.917 +/- 0.003
