In [12]:
import os
import pandas as pd
import numpy as np

In [13]:
df = pd.read_csv("/Users/sreeharshaankem/MachineLearning/pytorch_nlp_book/data/IMDB Dataset.csv")
df.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [14]:
reviews_json = df.to_dict(orient="records")

# Model Training

In [16]:
os.chdir("/Users/sreeharshaankem/MachineLearning/mlops/sentiment_prediction/predict_sentiment/")

In [17]:
import tensorflow as tf
from tensorflow.keras import layers as L
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, EarlyStopping
import config
from prepare_data import PrepareData


In [18]:
!pwd

/Users/sreeharshaankem/MachineLearning/mlops/sentiment_prediction/predict_sentiment


In [20]:
class TrainModel(PrepareData):
    def __init__(self, epochs, batch_size):
        super().__init__(input_fields=config.INPUT_FIELDS,
                                text_field=config.TEXT_FIELD,
                                target_field=config.TARGET_FIELD,
                                maxlen=config.MAX_LEN,
                                padding="pre",
                                vocab_size=config.VOCAB_SIZE)
        self.epochs = epochs
        self.batch_size=batch_size
        self.model = self.model_architecture()

    def model_architecture(self):
        inp = L.Input(shape=(config.MAX_LEN, ),name="text_input")
        embedding = L.Embedding(input_dim = config.VOCAB_SIZE+1, 
                                output_dim=config.EMBEDDING_DIM)(inp)
        mean_embedding = L.Lambda(lambda x: tf.reduce_mean(x, axis=1))(embedding)
        fc1 = L.Dense(units=16, activation="relu")(mean_embedding)
        out = L.Dense(units=1, activation="sigmoid", name="sentiment_score")(fc1)
        model = Model(inputs=[inp],outputs=[out])
                                    
        model.compile(loss="binary_crossentropy", metrics=["accuracy"], 
                     optimizer = Adam(lr=1e-4))
        return model        
        
    def run_training(self, input_json):
        train_x, test_x, train_y, test_y = self.run_prep(input_json, split=True)
        train_x = np.array(train_x.tolist())
        test_x = np.array(test_x.tolist())
        
        os.chdir("/Users/sreeharshaankem/MachineLearning/mlops/sentiment_prediction/training")
        logger = CSVLogger(filename="logs/training.csv")
        es = EarlyStopping(monitor="val_loss", patience=3)
        checkpoint = ModelCheckpoint(filepath="models/weights.{epoch:02d}-{val_loss:.2f}.hdf5",
                                    save_best_only=True, save_weights_only=True)
        
        self.model.fit(train_x, train_y, epochs=self.epochs, validation_data=(test_x, test_y),
                      batch_size=self.batch_size, callbacks = [es, logger, checkpoint])
        

In [24]:
trainer = TrainModel(
                    epochs = config.EPOCHS,
                    batch_size=config.BATCH_SIZE)

In [25]:
trainer.run_training(input_json=reviews_json)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
