In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv('diabetes.csv')

In [None]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [None]:
X, y = df.drop(['Outcome'], axis=1), df['Outcome']

scaler = StandardScaler()
X = scaler.fit_transform(X)
data = np.hstack((X, np.array(y).reshape(-1, 1)))
transformed_df = pd.DataFrame(data, columns=df.columns)

In [None]:
over = RandomOverSampler()
X, y = over.fit_resample(X, y)
data = np.hstack((X, np.reshape(y, (-1, 1))))
transformed_df = pd.DataFrame(data, columns=df.columns)

In [None]:
len(transformed_df[transformed_df['Outcome'] == 1]), len(transformed_df[transformed_df['Outcome'] == 0])

(500, 500)

In [None]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=0)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [None]:
X_train

array([[-0.84488505, -0.68523633, -0.57412775, ..., -0.83672504,
         0.23896296, -1.04154944],
       [-1.14185152, -0.81042491,  0.8217115 , ...,  0.6863059 ,
        -0.67915312, -0.78628618],
       [-0.84488505,  2.44447821,  0.35643175, ...,  1.38436175,
         2.784923  , -0.95646168],
       ...,
       [ 0.04601433, -0.84172205, -0.2122435 , ..., -0.92556851,
        -0.97814487, -1.04154944],
       [ 2.12477957, -1.12339636,  0.25303625, ..., -0.24020459,
        -0.51908683,  0.14967911],
       [ 0.3429808 ,  0.47275805,  0.66661825, ..., -4.06047387,
         0.50775352,  3.04266271]])

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

In [None]:
model.evaluate(X_train, y_train)

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4801 - loss: 0.7788  


[0.7753003835678101, 0.4749999940395355]

In [None]:
model.evaluate(X_valid, y_valid)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.4746 - loss: 0.7871 


[0.7713990211486816, 0.4950000047683716]

In [None]:
model.fit(X_train, y_train, batch_size=16, epochs=20, validation_data=(X_valid, y_valid))

Epoch 1/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.4995 - loss: 0.7328 - val_accuracy: 0.5600 - val_loss: 0.6869
Epoch 2/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6007 - loss: 0.6599 - val_accuracy: 0.6250 - val_loss: 0.6310
Epoch 3/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6880 - loss: 0.6206 - val_accuracy: 0.6750 - val_loss: 0.5951
Epoch 4/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.6588 - loss: 0.6035 - val_accuracy: 0.6950 - val_loss: 0.5711
Epoch 5/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.7140 - loss: 0.5624 - val_accuracy: 0.7300 - val_loss: 0.5472
Epoch 6/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.7111 - loss: 0.5648 - val_accuracy: 0.7200 - val_loss: 0.5305
Epoch 7/20
[1m38/38[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x78c34fb835f0>

In [None]:
model.evaluate(X_test, y_test)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7431 - loss: 0.5085 


[0.5130392909049988, 0.7400000095367432]

In [None]:
import tensorflow as tf
import tensorflow_hub as hub

In [None]:
df = pd.read_csv('wine-reviews.csv', usecols=['country', 'description', 'points', 'variety', 'winery'])

In [None]:
df.head()

Unnamed: 0,country,description,points,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",87,White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",87,Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",87,Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom ...",87,Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this...",87,Pinot Noir,Sweet Cheeks


In [None]:
df['label'] = (df.points >= 90).astype(int)
df = df[['description', 'label']]

In [None]:
train, val, test = np.split(df.sample(frac=1), [int(0.8*len(df)), int(0.9*len(df))])

  return bound(*args, **kwds)


In [None]:
def df_to_dataset(dataframe, shuffle=True, batch_size=1024):
  df = dataframe.copy()
  labels = df.pop('label')
  df = df['description']
  ds = tf.data.Dataset.from_tensor_slices((df, labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  ds = ds.prefetch(tf.data.AUTOTUNE)
  return ds

In [None]:
train_data = df_to_dataset(train)
val_data = df_to_dataset(val)
test_data = df_to_dataset(test)

In [None]:
embedding = "https://tfhub.dev/google/nnlm-en-dim128/2"
hub_layer = hub.KerasLayer(embedding, dtype=tf.string, trainable=True)

In [None]:
import tensorflow as tf
import tensorflow_hub as hub

model = tf.keras.Sequential([
    tf.keras.layers.Lambda(lambda x: hub_layer(x)),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

In [None]:
model.evaluate(train_data)

[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.6263 - loss: 0.6771


[0.6771231293678284, 0.6257501840591431]

In [None]:
model.evaluate(val_data)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.6310 - loss: 0.6768


[0.676330029964447, 0.6335307955741882]

In [None]:
history = model.fit(train_data, validation_data=val_data, epochs=20)

Epoch 1/40
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - accuracy: 0.6079 - loss: 0.6665 - val_accuracy: 0.6298 - val_loss: 0.6134
Epoch 2/40
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.6327 - loss: 0.6233 - val_accuracy: 0.7242 - val_loss: 0.5633
Epoch 3/40
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.6814 - loss: 0.5888 - val_accuracy: 0.7315 - val_loss: 0.5381
Epoch 4/40
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 0.7098 - loss: 0.5705 - val_accuracy: 0.7402 - val_loss: 0.5255
Epoch 5/40
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 21ms/step - accuracy: 0.7192 - loss: 0.5589 - val_accuracy: 0.7434 - val_loss: 0.5192
Epoch 6/40
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - accuracy: 0.7227 - loss: 0.5496 - val_accuracy: 0.7445 - val_loss: 0.5132
Epoch 7/40
[1m102/102

In [None]:
model.evaluate(test_data)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.7516 - loss: 0.5018


[0.4983934164047241, 0.7549623250961304]

#LSTM

In [None]:
encoder = tf.keras.layers.TextVectorization(max_tokens=2000)
encoder.adapt(train_data.map(lambda text, label: text))

In [None]:
vocab = np.array(encoder.get_vocabulary())
vocab[:20]

array(['', '[UNK]', 'and', 'the', 'a', 'of', 'with', 'this', 'is', 'wine',
       'flavors', 'in', 'it', 'to', 'its', 'on', 'fruit', 'aromas',
       'palate', 'that'], dtype='<U17')

In [None]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=32,
        mask_zero=True
    ),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

In [None]:
model.evaluate(train_data)
model.evaluate(val_data)

[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 190ms/step - accuracy: 0.3981 - loss: 0.6940
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 139ms/step - accuracy: 0.3992 - loss: 0.6939


[0.6939622163772583, 0.3954758644104004]

In [None]:
history = model.fit(train_data, validation_data=val_data, epochs=5)

Epoch 1/5
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 510ms/step - accuracy: 0.8533 - loss: 0.3294 - val_accuracy: 0.8390 - val_loss: 0.3516
Epoch 2/5
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 448ms/step - accuracy: 0.8528 - loss: 0.3302 - val_accuracy: 0.8407 - val_loss: 0.3460
Epoch 3/5
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 424ms/step - accuracy: 0.8551 - loss: 0.3243 - val_accuracy: 0.8413 - val_loss: 0.3480
Epoch 4/5
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 407ms/step - accuracy: 0.8537 - loss: 0.3218 - val_accuracy: 0.8403 - val_loss: 0.3471
Epoch 5/5
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 448ms/step - accuracy: 0.8569 - loss: 0.3181 - val_accuracy: 0.8399 - val_loss: 0.3514


In [None]:
model.evaluate(test_data)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 198ms/step - accuracy: 0.8397 - loss: 0.3532


[0.35408613085746765, 0.8402062058448792]