<a href="https://colab.research.google.com/github/Paresh1879/Wine-Review---Tensorflow/blob/main/Wine_Review.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf

from tensorflow import keras

import tensorflow_hub as hub

In [None]:
df = pd.read_csv('wine-reviews.csv',usecols = ['country','description','points','price','variety','winery'])

In [None]:
df.head()


Unnamed: 0,country,description,points,price,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",87,,White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",87,15.0,Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",87,14.0,Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom ...",87,13.0,Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this...",87,65.0,Pinot Noir,Sweet Cheeks


In [None]:
df = df.dropna(subset = ["description","points"])

In [None]:
df['label'] = (df.points >=90).astype(int)
df = df[['description','points','label']]

In [None]:
df.tail()

Unnamed: 0,description,points,label
129966,Notes of honeysuckle and cantaloupe sweeten th...,90,1
129967,Citation is given as much as a decade of bottl...,90,1
129968,Well-drained gravel soil gives this wine its c...,90,1
129969,"A dry style of Pinot Gris, this is crisp with ...",90,1
129970,"Big, rich and off-dry, this is powered by inte...",90,1


In [None]:
train,val,test = np.split(df.sample(frac=1),[int(0.8*len(df)),int(0.9*len(df))])

In [None]:
len (train),len(val),len(test)

(103976, 12997, 12998)

## create a train, test validate in thr below using TF

In [None]:
def df_to_dataset(dataframe, shuffle=True, batch_size=1024):
  df = dataframe.copy()
  labels = df.pop('label')
  df = df['description']
  ds = tf.data.Dataset.from_tensor_slices((df, labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  ds = ds.prefetch(tf.data.AUTOTUNE)
  return ds

In [None]:
train_data = df_to_dataset(train)
val_data = df_to_dataset(val)
test_data = df_to_dataset(test)



## Embedding and Layering

In [None]:
embedding = "https://tfhub.dev/google/nnlm-en-dim128/2"

In [None]:
hub_layer = hub.KerasLayer(embedding, dtype=tf.string, trainable=True)

In [None]:
hub_layer(list(train_data)[0][0])

<tf.Tensor: shape=(1024, 128), dtype=float32, numpy=
array([[ 0.28313455, -0.24009794,  0.12538715, ..., -0.06565645,
         0.07031234,  0.0514762 ],
       [ 0.54244107, -0.15548842,  0.20785172, ..., -0.21874702,
         0.00644438,  0.23122936],
       [ 0.5866352 , -0.21869862,  0.02680771, ..., -0.11263682,
         0.05477997,  0.06899633],
       ...,
       [ 0.26568112, -0.2699216 ,  0.09703694, ..., -0.12457646,
         0.15429808,  0.18167762],
       [ 0.7124612 , -0.19492243,  0.10763787, ..., -0.43549612,
        -0.08675535, -0.12815225],
       [ 0.38030997, -0.13644871,  0.07034801, ..., -0.20280549,
         0.07169035,  0.18319322]], dtype=float32)>

In [None]:
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1,activation='sigmoid'))

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss = tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

In [None]:
model.evaluate(train_data)



[0.6782740354537964, 0.6094964146614075]

In [None]:
model.evaluate(val_data)



[0.679583728313446, 0.6069862246513367]

In [62]:
history = model.fit(train_data,validation_data=val_data,epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [64]:
model.evaluate(test_data)



[0.4576888680458069, 0.8247422575950623]

## LSTM

In [65]:
encoder = tf.keras.layers.TextVectorization(max_tokens=2000)
encoder.adapt(train_data.map(lambda text, label: text))

In [69]:
vocab = np.array(encoder.get_vocabulary())
vocab[:20]

array(['', '[UNK]', 'and', 'the', 'a', 'of', 'with', 'this', 'is', 'wine',
       'flavors', 'in', 'it', 'to', 'its', 'on', 'fruit', 'aromas',
       'palate', 'that'], dtype='<U17')

In [71]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=32,
        mask_zero=True
    ),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [72]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss = tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

In [74]:
model.evaluate(train_data)
model.evaluate(val_data)




[0.6924291849136353, 0.5965992212295532]

In [75]:
history = model.fit(train_data, epochs=1, validation_data=val_data)



In [76]:
model.evaluate(test_data)



[0.3804786801338196, 0.825434684753418]