In [6]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Assuming 'description' is a text attribute and 'category' is a categorical attribute
text_attribute = 'about_product'
categorical_attribute = 'category'

# # Load data
data = pd.read_csv('content/amazon.csv')

# data.head()

# Select specific columns
selected_data = data[['product_name', 'about_product', 'actual_price', 'category']]



# Tokenization and encoding
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(selected_data[text_attribute])
sequences = tokenizer.texts_to_sequences(selected_data[text_attribute])
padded_sequences = pad_sequences(sequences, padding='post')
labels = selected_data[categorical_attribute]

# Encoding categorical labels
encoder = LabelEncoder()
labels = encoder.fit_transform(labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2)


In [8]:
import mlflow
import mlflow.tensorflow
import time

# Enable auto logging
mlflow.tensorflow.autolog()

# Start a new run
with mlflow.start_run():

    # Define the model
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(10000, 16, input_length=padded_sequences.shape[1]),
        tf.keras.layers.GlobalAveragePooling1D(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(len(set(labels)), activation='softmax')
    ])

    # Compile the model
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Train the model
    history = model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test), verbose=2)

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f'Loss: {loss}')
    print(f'Accuracy: {accuracy}')

    # Log metrics
    mlflow.log_metric("loss", loss)
    mlflow.log_metric("accuracy", accuracy)

    # Save the model to the MLflow Model format
    timestamp = int(time.time())
    mlflow.tensorflow.save_model(model, f"model_{timestamp}")

Epoch 1/20




37/37 - 4s - loss: 5.3251 - accuracy: 0.0461 - val_loss: 5.2767 - val_accuracy: 0.0478 - 4s/epoch - 116ms/step
Epoch 2/20


  saving_api.save_model(


37/37 - 1s - loss: 5.0648 - accuracy: 0.1297 - val_loss: 4.7413 - val_accuracy: 0.1604 - 650ms/epoch - 18ms/step
Epoch 3/20
37/37 - 1s - loss: 4.3922 - accuracy: 0.1587 - val_loss: 4.5001 - val_accuracy: 0.1604 - 514ms/epoch - 14ms/step
Epoch 4/20
37/37 - 1s - loss: 4.2634 - accuracy: 0.1587 - val_loss: 4.4816 - val_accuracy: 0.1604 - 523ms/epoch - 14ms/step
Epoch 5/20
37/37 - 1s - loss: 4.2470 - accuracy: 0.1587 - val_loss: 4.4835 - val_accuracy: 0.1604 - 513ms/epoch - 14ms/step
Epoch 6/20
37/37 - 0s - loss: 4.2142 - accuracy: 0.1587 - val_loss: 4.4722 - val_accuracy: 0.1604 - 419ms/epoch - 11ms/step
Epoch 7/20
37/37 - 0s - loss: 4.1980 - accuracy: 0.1587 - val_loss: 4.4584 - val_accuracy: 0.1604 - 425ms/epoch - 11ms/step
Epoch 8/20
37/37 - 0s - loss: 4.1506 - accuracy: 0.1587 - val_loss: 4.4225 - val_accuracy: 0.1604 - 416ms/epoch - 11ms/step
Epoch 9/20
37/37 - 0s - loss: 4.1064 - accuracy: 0.1587 - val_loss: 4.3858 - val_accuracy: 0.1604 - 433ms/epoch - 12ms/step
Epoch 10/20
37/37 -

INFO:tensorflow:Assets written to: C:\Users\Public\Documents\Wondershare\CreatorTemp\tmpgn2mdoyc\model\data\model\assets






Loss: 4.173336505889893
Accuracy: 0.16040955483913422
INFO:tensorflow:Assets written to: c:\Users\Abel\Documents\Projects\Payever\notebooks\model_1713567392\data\model\assets


INFO:tensorflow:Assets written to: c:\Users\Abel\Documents\Projects\Payever\notebooks\model_1713567392\data\model\assets


In [None]:
import mlflow
import mlflow.tensorflow
import time
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense, Dropout, LSTM
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2

# Enable auto logging
mlflow.tensorflow.autolog()

# Start a new run
with mlflow.start_run():

    # Define the model
    model = tf.keras.Sequential([
        Embedding(10000, 32, input_length=padded_sequences.shape[1]),
        LSTM(64, return_sequences=True),
        GlobalAveragePooling1D(),
        Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
        Dropout(0.5),
        Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
        Dense(len(set(labels)), activation='softmax')
    ])

    # Compile the model
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Define callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=3)
    lr_reduction = ReduceLROnPlateau(monitor='val_loss', patience=2, verbose=1, factor=0.5, min_lr=0.00001)

    # Train the model
    history = model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test), verbose=2, callbacks=[early_stopping, lr_reduction])

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f'Loss: {loss}')
    print(f'Accuracy: {accuracy}')

    # Log metrics
    mlflow.log_metric("loss", loss)
    mlflow.log_metric("accuracy", accuracy)

    # Save the model to the MLflow Model format
    timestamp = int(time.time())
    mlflow.tensorflow.save_model(model, f"model_{timestamp}")