In [6]:
import numpy as np
import pandas as pd

# Configuración warnings
# ------------------------------------------------------------------------------
import warnings
#warnings.filterwarnings('once')
warnings.filterwarnings('ignore')

In [7]:
# Data Loading

df = pd.read_csv('winequality-red.csv')
 
df.head()



Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [8]:
# Splitting training and test datasets
import tensorflow as tf

# 75% of the data is selected
train_df = df.sample(frac=0.75, random_state=4) 

# it drops the training data
# from the original dataframe
val_df = df.drop(train_df.index)


In [9]:
# Normalization

# calling to (0,1) range
max_val = train_df.max(axis= 0)
min_val = train_df.min(axis= 0)

range = max_val - min_val
train_df = (train_df - min_val)/(range)

val_df = (val_df- min_val)/range


In [10]:
# now let's separate the targets and labels
X_train = train_df.drop('quality',axis=1)
X_val = val_df.drop('quality',axis=1)
y_train = train_df['quality']
y_val = val_df['quality']

# We'll need to pass the shape
# of features/inputs as an argument
# in our model, so let's define a variable 
# to save it.
input_shape = [X_train.shape[1]]

input_shape  # number of features



[11]

In [11]:
y_train = tf.keras.utils.to_categorical(y_train-3, num_classes=6)

In [12]:
y_train

array([[0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0.],
       ...,
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0.]], dtype=float32)

In [20]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense,Dropout, BatchNormalization
from keras import regularizers


In [22]:
model = Sequential()

# Add layers to the model
model.add(Dense(4096, input_shape= (X_train.shape[1],), activation='relu')) #input layer with 64 neurons
model.add(Dropout(0.5))
model.add(Dense(1024,activation= 'relu'))
model.add(Dense(512,activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(256,activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(128,activation = 'relu'))
model.add(Dense(6, activation='softmax')) #output layer with 1 neuron 
model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Nadam(4e-4),metrics ='Precision')
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 4096)              49152     
_________________________________________________________________
dropout_6 (Dropout)          (None, 4096)              0         
_________________________________________________________________
dense_13 (Dense)             (None, 1024)              4195328   
_________________________________________________________________
dense_14 (Dense)             (None, 512)               524800    
_________________________________________________________________
dropout_7 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 256)               131328    
_________________________________________________________________
dropout_8 (Dropout)          (None, 256)              

In [23]:
model.fit(X_train,y_train,validation_split = 0.26,batch_size =128,epochs = 60)#,callbacks=[callbacks])

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60


Epoch 59/60
Epoch 60/60


<keras.callbacks.History at 0x14ae1ce2a60>

In [24]:
# Get the weights of the first layer
weights = model.layers[0].get_weights()[0]

# Get the absolute values of the weights
importances = np.abs(weights)

# Normalize the importances
importances = importances / importances.sum(axis=0)

# Print the importances of each feature
for i, importance in enumerate(importances):
    print("Feature", i, "Importance", np.median(importance))

Feature 0 Importance 0.087237686
Feature 1 Importance 0.08464585
Feature 2 Importance 0.09019297
Feature 3 Importance 0.08756355
Feature 4 Importance 0.08763155
Feature 5 Importance 0.08847035
Feature 6 Importance 0.096224725
Feature 7 Importance 0.082624905
Feature 8 Importance 0.08724287
Feature 9 Importance 0.08879922
Feature 10 Importance 0.09277625


In [26]:
prediction_train = tf.argmax( model.predict(X_train), axis=1) 
print(y_pred_train)

tf.Tensor([4 4 4 ... 4 4 4], shape=(1199,), dtype=int64)


In [29]:
prediction_test = tf.argmax( model.predict(X_val), axis=1) 
print(prediction_test)

tf.Tensor(
[4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4], shape=(400,), dtype=int64)


In [32]:
from sklearn.metrics import plot_confusion_matrix

fig = plot_confusion_matrix(model, X_train, y_train, display_labels=model.classes_)
fig.figure_.suptitle("Confusion Matrix for Winequality Dataset - TRAIN")
plt.show()

AttributeError: 'Sequential' object has no attribute 'classes_'

In [None]:
# https://www.kaggle.com/code/ashishkumarak/wine-quality-prediction-tensorflow

In [25]:
from kerastuner import RandomSearch
import kerastuner


def build_model(hp):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(units=hp.Int('units', min_value=2048, max_value=8116, step=512), activation='relu', input_shape=(X_train.shape[1],)))
    model.add(Dense(units=hp.Int('units', min_value=2048, max_value=4096, step=512), activation='relu')) 
    model.add(Dropout(0.5))
    model.add(Dense(units=hp.Int('units', min_value=512, max_value=1024, step=256),activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(units=hp.Int('units', min_value=256, max_value=512, step=64),activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(256,activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128,activation = 'relu'))
    model.add(Dense(6, activation='softmax')) 
    model.compile(optimizer=tf.keras.optimizers.Nadam(hp.Choice('learning_rate', values=[1e-3, 4e-4, 7e-4,3e-5])),
                  loss=tfa.losses.SigmoidFocalCrossEntropy(), metrics=[tfa.metrics.CohenKappa(num_classes = 6,weightage = 'quadratic')])
    return model

tuner = RandomSearch(build_model,    objective=kerastuner.Objective("val_loss", direction="min"),
     max_trials=10, project_name='intro_to_kt')
tuner.search(x=X_train, y=y_train, epochs=25,batch_size = 128, validation_split = 0.26)

best_model = tuner.get_best_models(num_models=1)[0]

ModuleNotFoundError: No module named 'kerastuner'

In [6]:
#Model building

model = tf.keras.Sequential([

    tf.keras.layers.Dense(units=64, activation='relu',input_shape=input_shape),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=1)
])


# after you create your model it's
# always a good habit to print out it's summary
model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                768       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
Total params: 4,993
Trainable params: 4,993
Non-trainable params: 0
_________________________________________________________________


In [7]:
# adam optimizer works pretty well for
# all kinds of problems and is a good starting point
model.compile(optimizer='adam', 
            # MAE error is good for numerical predictions
            loss='mae') 


In [8]:
losses = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                # it will use 'batch_size' number
                # of examples per example
                batch_size=256, 
                epochs=15, # total epoch
                )


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [9]:
# this will pass the first 3 rows of features
# of our data as input to make predictions
model.predict(X_val.iloc[0:3, :])



array([[0.42174911],
       [0.519015  ],
       [0.4368481 ]], dtype=float32)

In [10]:
y_val.iloc[0:3]



0     0.4
9     0.4
12    0.4
Name: quality, dtype: float64

In [12]:
loss_df

Unnamed: 0,loss,val_loss
0,0.723186,0.581182
1,0.511549,0.396933
2,0.335691,0.237998
3,0.189284,0.139953
4,0.140986,0.146401
5,0.15392,0.147429
6,0.143588,0.130583
7,0.127368,0.121019
8,0.120343,0.116173
9,0.116616,0.11202
