# TensorFlow / Keras

# Load Dataset

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("data.csv", sep="\t", encoding="utf-8")

In [3]:
df

Unnamed: 0,sentence,target,source
0,So there is no way for me to plug it in here i...,0,amazon
1,"Good case, Excellent value.",1,amazon
2,Great for the jawbone.,1,amazon
3,Tied to charger for conversations lasting more...,0,amazon
4,The mic is great.,1,amazon
...,...,...,...
2743,I think food should have flavor and texture an...,0,yelp
2744,Appetite instantly gone.,0,yelp
2745,Overall I was not impressed and would not go b...,0,yelp
2746,"The whole experience was underwhelming, and I ...",0,yelp


## Split dataset

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(df["sentence"], df["target"], test_size=0.2, random_state=123)

In [6]:
X_train.shape, y_train.shape

((2198,), (2198,))

In [7]:
X_test.shape, y_test.shape

((550,), (550,))

# Preprocessing

In [8]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences




In [9]:
NUM_WORDS = 5000
MAX_LEN = 100

tokenizer = Tokenizer(num_words=NUM_WORDS)
tokenizer.fit_on_texts(X_train)  # Important be only the train data!!!

In [11]:
import pickle

with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

In [14]:
X_train_encoded = tokenizer.texts_to_sequences(X_train)
X_test_encoded = tokenizer.texts_to_sequences(X_test)

In [15]:
tokenizer.texts_to_sequences(["The mic is great."])

[[1, 1213, 5, 20]]

In [16]:
X_train_encoded = pad_sequences(X_train_encoded, maxlen=MAX_LEN, padding="post")
X_test_encoded = pad_sequences(X_test_encoded, maxlen=MAX_LEN, padding="post")

In [17]:
X_train_encoded.shape, X_test_encoded.shape

((2198, 100), (550, 100))

# MLFlow

In [18]:
import mlflow
from mlflow.data.pandas_dataset import PandasDataset

mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

In [19]:
dataset: PandasDataset = mlflow.data.from_pandas(df, source="data.csv")

  return _dataset_source_registry.resolve(


# Experiment 3 (Keras)

In [20]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Embedding, Flatten
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [21]:
experiment_name = "sentiment_analysis_tf_dense"

experiment_tags = {
    "nlp.framework": "TensorFlow",
    "nlp.encoding": "Tokenizer",
    "nlp.model": "Dense Network",
    "nlp.task": "Sentiment Analysis"
}

mlflow.create_experiment(name=experiment_name,
                         artifact_location="mlartifacts",
                         tags=experiment_tags)

'795147623486966613'

In [22]:
mlflow.set_experiment(experiment_name=experiment_name)  # It could be use the ID too

params_list = [
    {
        "units": 1,
        "activation": "tanh",
        "kernel_regularizer": None,
        "epochs": 10
    },
    {
        "units": 10,
        "activation": "relu",
        "kernel_regularizer": "l2",
        "epochs": 40
    }
]

for i, params in enumerate(params_list):
    print(i)

    model = Sequential(
        [
            Embedding(input_dim=len(tokenizer.index_word) + 1, 
                      output_dim=50, 
                      input_length=MAX_LEN),
            Flatten(),
            Dense(units=params["units"], 
                  activation=params["activation"], 
                  kernel_regularizer=params["kernel_regularizer"]),
            Dense(units=1, activation="sigmoid")
        ]
    )
    
    model.summary()

    model.compile(optimizer="adam", 
                  loss="binary_crossentropy", 
                  metrics=["accuracy"])

    history = model.fit(x=X_train_encoded,
                        y=y_train, 
                        batch_size=512, 
                        epochs=params["epochs"], 
                        verbose=2, 
                        validation_data=(X_test_encoded, y_test)
    )

    y_pred = tf.math.round(model.predict(X_test_encoded))
    
    accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
    precision = precision_score(y_true=y_test, y_pred=y_pred)
    recall = recall_score(y_true=y_test, y_pred=y_pred)
    f1 = f1_score(y_true=y_test, y_pred=y_pred)
    
    metrics = {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

    run_name = "_".join([f"{k}_{v}" for k, v in params.items()])
    
    with mlflow.start_run(run_name=run_name):
        # Log the hyperparameters
        mlflow.log_params(params)
    
        # Log the metrics
        mlflow.log_metrics(metrics)
        ## Graph of accuracies and losses
        h = history.history
        for j, (train_loss, train_acc, test_loss, test_acc) in enumerate(zip(h["loss"], h["accuracy"], h["val_loss"], h["val_accuracy"])):
            mlflow.log_metric(key="train_loss", value=train_loss, step=j)
            mlflow.log_metric(key="train_acc", value=train_acc, step=j)
            mlflow.log_metric(key="test_loss", value=test_loss, step=j)
            mlflow.log_metric(key="test_acc", value=test_acc, step=j)

        # Log the dataset
        mlflow.log_input(dataset, context="training")
    
        # Log the model ! New versions it is mlflow.keras !
        mlflow.tensorflow.save_model(model=model, path=f"models/yelp_model_tf_{i}", input_example=X_train_encoded)
        
        mlflow.log_artifact(f"models/yelp_model_tf_{i}", artifact_path="mlartifacts")

0

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 50)           231600    
                                                                 
 flatten (Flatten)           (None, 5000)              0         
                                                                 
 dense (Dense)               (None, 1)                 5001      
                                                                 
 dense_1 (Dense)             (None, 1)                 2         
                                                                 
Total params: 236603 (924.23 KB)
Trainable params: 236603 (924.23 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________

Epoch 1/10


5/5 - 1s - loss: 0.6931 - accuracy: 0.5073 - val_loss: 0.6932 - val_accuracy: 0.4945 - 1s/epoch - 259ms/step
Epoch 2/10
5/5 - 0s - 

  return _infer_schema(self._df)


INFO:tensorflow:Assets written to: C:\Users\BVIEIRA1\tmp\hello-mlflow\models\yelp_model_tf_0\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\BVIEIRA1\tmp\hello-mlflow\models\yelp_model_tf_0\data\model\assets


1
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 100, 50)           231600    
                                                                 
 flatten_1 (Flatten)         (None, 5000)              0         
                                                                 
 dense_2 (Dense)             (None, 10)                50010     
                                                                 
 dense_3 (Dense)             (None, 1)                 11        
                                                                 
Total params: 281621 (1.07 MB)
Trainable params: 281621 (1.07 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________




Epoch 1/40
5/5 - 1s - loss: 0.8672 - accuracy: 0.4986 - val_loss: 0.8206 - val_accuracy: 0.4927 - 718ms/epoch - 144ms/step
Epoch 2/40
5/5 - 0s - loss: 0.8005 - accuracy: 0.5109 - val_loss: 0.7700 - val_accuracy: 0.4927 - 81ms/epoch - 16ms/step
Epoch 3/40
5/5 - 0s - loss: 0.7550 - accuracy: 0.5091 - val_loss: 0.7351 - val_accuracy: 0.5964 - 84ms/epoch - 17ms/step
Epoch 4/40
5/5 - 0s - loss: 0.7266 - accuracy: 0.5560 - val_loss: 0.7168 - val_accuracy: 0.4927 - 72ms/epoch - 14ms/step
Epoch 5/40
5/5 - 0s - loss: 0.7050 - accuracy: 0.5883 - val_loss: 0.7035 - val_accuracy: 0.6636 - 76ms/epoch - 15ms/step
Epoch 6/40
5/5 - 0s - loss: 0.6897 - accuracy: 0.7106 - val_loss: 0.6955 - val_accuracy: 0.6182 - 69ms/epoch - 14ms/step
Epoch 7/40
5/5 - 0s - loss: 0.6737 - accuracy: 0.8312 - val_loss: 0.6889 - val_accuracy: 0.6891 - 81ms/epoch - 16ms/step
Epoch 8/40
5/5 - 0s - loss: 0.6554 - accuracy: 0.8731 - val_loss: 0.6824 - val_accuracy: 0.6891 - 79ms/epoch - 16ms/step
Epoch 9/40
5/5 - 0s - loss: 0.

INFO:tensorflow:Assets written to: C:\Users\BVIEIRA1\tmp\hello-mlflow\models\yelp_model_tf_1\data\model\assets
