In [1]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import keras

In [3]:
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, TransformerMixin

from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

In [4]:
X, y = load_wine(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=True, random_state=0)

train_df = pd.DataFrame(X_train, columns=load_wine().feature_names)
test_df = pd.DataFrame(X_test, columns=load_wine().feature_names)

In [5]:
train_df.head(15)

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,13.64,3.1,2.56,15.2,116.0,2.7,3.03,0.17,1.66,5.1,0.96,3.36,845.0
1,12.6,2.46,2.2,18.5,94.0,1.62,0.66,0.63,0.94,7.1,0.73,1.58,695.0
2,11.96,1.09,2.3,21.0,101.0,3.38,2.14,0.13,1.65,3.21,0.99,3.13,886.0
3,12.25,3.88,2.2,18.5,112.0,1.38,0.78,0.29,1.14,8.21,0.65,2.0,855.0
4,14.3,1.92,2.72,20.0,120.0,2.8,3.14,0.33,1.97,6.2,1.07,2.65,1280.0
5,12.88,2.99,2.4,20.0,104.0,1.3,1.22,0.24,0.83,5.4,0.74,1.42,530.0
6,13.49,3.59,2.19,19.5,88.0,1.62,0.48,0.58,0.88,5.7,0.81,1.82,580.0
7,13.56,1.71,2.31,16.2,117.0,3.15,3.29,0.34,2.34,6.13,0.95,3.38,795.0
8,14.34,1.68,2.7,25.0,98.0,2.8,1.31,0.53,2.7,13.0,0.57,1.96,660.0
9,13.71,1.86,2.36,16.6,101.0,2.61,2.88,0.27,1.69,3.8,1.11,4.0,1035.0


In [6]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 133 entries, 0 to 132
Data columns (total 13 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   alcohol                       133 non-null    float64
 1   malic_acid                    133 non-null    float64
 2   ash                           133 non-null    float64
 3   alcalinity_of_ash             133 non-null    float64
 4   magnesium                     133 non-null    float64
 5   total_phenols                 133 non-null    float64
 6   flavanoids                    133 non-null    float64
 7   nonflavanoid_phenols          133 non-null    float64
 8   proanthocyanins               133 non-null    float64
 9   color_intensity               133 non-null    float64
 10  hue                           133 non-null    float64
 11  od280/od315_of_diluted_wines  133 non-null    float64
 12  proline                       133 non-null    float64
dtypes: fl

In [7]:
train_df.describe()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
count,133.0,133.0,133.0,133.0,133.0,133.0,133.0,133.0,133.0,133.0,133.0,133.0,133.0
mean,12.999398,2.390977,2.365489,19.51203,100.458647,2.261579,1.956917,0.363985,1.609474,5.113083,0.947338,2.586015,749.81203
std,0.803811,1.122554,0.271995,3.52532,14.484846,0.613286,0.984769,0.127873,0.601056,2.405212,0.233392,0.725185,303.322165
min,11.03,0.89,1.36,10.6,70.0,1.1,0.47,0.13,0.42,1.28,0.54,1.27,312.0
25%,12.37,1.61,2.21,17.1,89.0,1.7,1.02,0.27,1.15,3.21,0.77,1.86,515.0
50%,13.05,1.9,2.36,19.5,98.0,2.2,2.04,0.34,1.56,4.6,0.95,2.77,675.0
75%,13.64,3.24,2.56,21.5,107.0,2.74,2.79,0.45,1.96,6.6,1.11,3.17,985.0
max,14.75,5.65,3.22,30.0,162.0,3.88,3.74,0.66,3.58,13.0,1.71,4.0,1547.0


In [8]:
class NumberSelector(BaseEstimator, TransformerMixin):
    def __init__(self, key):
        self.key = key

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return X[[self.key]]

In [9]:
cont_transformers = []
cont_columns = ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins','color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']


for col in cont_columns:
    transfomer =  Pipeline([
                ('selector', NumberSelector(key=col)),
                ('standard', StandardScaler())
            ])
    cont_transformers.append((col, transfomer))


feats = FeatureUnion(cont_transformers)

In [10]:
X_train = feats.fit_transform(train_df)
X_test = feats.transform(test_df)

In [11]:
model = keras.Sequential([
    keras.layers.Dense(128, activation='relu', input_shape=(13, )),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(10)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])


In [12]:
print(y_train[:5])

[0 2 1 2 0]


In [13]:
h = model.fit(X_train, y_train, epochs=20,verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [14]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)

2/2 - 0s - loss: 0.0304 - accuracy: 1.0000 - 62ms/epoch - 31ms/step


In [15]:
print('\nTest accuracy:', test_acc)


Test accuracy: 1.0


In [16]:
probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])

In [17]:
predictions = probability_model.predict(X_test[:5])



In [18]:
print(np.argmax(predictions, axis=1))
print(y_test[:5])

[0 2 1 0 1]
[0 2 1 0 1]
