In [15]:
from functions import *
import wordninja
import requests
import random
import pandas as pd
import os

# Torch cannot work properly in jupyter notebook
# import os
# count = 0 
# if count == 0:
#     os.chdir("test_dir")
#     count += 1


In [16]:
os.getcwd()

'c:\\Users\\Zoe Lua\\DSA4266_Grp2'

In [17]:
# pd.read_pickle(DF_PATH)

In [18]:
## CONFIG

DF_PATH = "Data/full_df_2.pkl"
X_NAME = 'clean_msg'
Y_NAME = 'class'
EMBEDDINGS_FOLDER = 'embeddings_2'

#### For preprocessing
# ALL_MAXLEN_PER_SENT = [150]
# ALL_TOKEN_MAX_WORDS = [5000]


In [19]:
## Semantic Dictionaries

def get_synonyms_conceptnet(word):
    synonyms = []
    url = f'http://api.conceptnet.io/c/en/{word}?filter=/c/en'
    response = requests.get(url)
    data = response.json()
    for edge in data['edges']:
        if edge['rel']['label'] == 'Synonym' and edge['start']['language'] == 'en' and edge['end']['language'] == 'en':
            start = edge['start']['label']
            end = edge['end']['label']
            synonyms.append(end if start == word else start)

    if synonyms != []:
        synonym = random.choice(synonyms)
    else:
        synonym = synonyms
    return synonym

def get_synonyms_wordnet(word):
    synonyms = []
    synsets = wordnet.synsets(word)
    for synset in synsets:
        synonyms.extend([lemma.name() for lemma in synset.lemmas() if lemma.name() != word])

    if synonyms != []:
        synonym = random.choice(synonyms)
    else:
        synonym = synonyms
    return synonym

In [20]:
class DataPrep():
    def __init__(self, subset = None, text_prep = 'lem', token_max_words = 5000, maxlen_per_sent = 512, undersample = True):
        """
        subset: X[:subset]
        """
        self.df = pd.read_pickle(DF_PATH)
        self.subset = subset
        self.maxlen_per_sent = maxlen_per_sent

        self.remove_duplicates()
        print('Dupes removed')
        self.X = self.df[X_NAME]
        self.y = self.df[Y_NAME].apply(lambda x: 1 if x == 'spam' else 0)
        self.token_max_words = token_max_words

        if self.subset:
            self.X = self.X[:self.subset]
            self.y = self.y[:self.subset]
        
        print('Tokenizing..')
        self.tokenize()
        print('Finished Tokenizing')

        print('Initialising word2vec')
        self.word_to_vec_map = self.word2vec()

        print('lemm/stemm')
        if text_prep == 'lem':
            self.X = self.lemming()
        if text_prep == 'stem':
            self.X = self.stemming()

        print('Embedding...')
        self.pre_embed()
        path = f'{EMBEDDINGS_FOLDER}/emb_matrix_x{self.subset}_tok_{self.maxlen_per_sent}_len{self.token_max_words}.pkl'
        if os.path.exists(path):
            self.emb_matrix = pd.read_pickle(path)
        else:
            self.emb_matrix = self.tok_embedding_mat(alternative = [get_synonyms_conceptnet, get_synonyms_wordnet])
            print('Finished embedding')

        print('Padding')
        X_pad = self.pad()
        print('Finished padding')

        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X_pad, self.y, test_size=0.33, random_state=42)

        if undersample:
            print('Undersampling..')
            print(Counter(self.y_train))
            self.X_train, self.y_train = self.undersample()
            print(Counter(self.y_train))




    def remove_duplicates(self):
    
        ## First remove all those X values with differing binary y values
        occurrences = self.df.groupby([X_NAME, Y_NAME]).size().reset_index(name='count')
        duplicates = occurrences[occurrences.duplicated(subset=X_NAME, keep=False)]
        for index, row in duplicates.iterrows():
            x_value = row[X_NAME]
            max_count = occurrences[(occurrences[X_NAME] == x_value)].max()['count']
            occurrences.drop(occurrences[(occurrences[X_NAME] == x_value) & (occurrences['count'] != max_count)].index, inplace=True)

        ## Remove duplicates
        self.df = occurrences.drop_duplicates(subset = X_NAME).reset_index(drop = True)
    
    def tokenize(self, join = False):
        def tokenize_helper(text, join = False):
            stop_words = set(stopwords.words('english'))
            tokens = word_tokenize(text)
            tokens = [word.lower() for word in tokens if word.lower() not in stop_words]

            if join:
                tokens = ' '.join([''.join(c for c in word if c not in string.punctuation) for word in tokens if word])
        
            return tokens
        
        self.X = self.X.apply(lambda x: tokenize_helper(x, join))

    ## Embedders
        
    def word2vec(self):
        from gensim.models.word2vec import Word2Vec
        import gensim.downloader as api

        word_to_vec_map = api.load("word2vec-google-news-300")

        return word_to_vec_map
    
    
    ## Stemming/ Lemmetization

    def stemming(self):
        ps = PorterStemmer()

        def stem(row):
            print(row)
            stemmed = []
            for word in row:
                stemmed += [ps.stem(word)]
            print('STEMMED:', stemmed)

            return stemmed

        return self.X.apply(stem)
    

    def lemming(self):

        def lem(row):
            lemmatizer = WordNetLemmatizer()
            lemmed = [lemmatizer.lemmatize(word) for word in row]
            # print(row)
            # print(lemmed,"\n")
            return lemmed

        return self.X.apply(lem)
    
    def pre_embed(self):
        self.tokenizer = text.Tokenizer(num_words=self.token_max_words)
        self.tokenizer.fit_on_texts(self.X)

        self.sequences = self.tokenizer.texts_to_sequences(self.X)

        self.word_index = self.tokenizer.word_index
        self.vocab_len = len(self.word_index) + 1
        self.embed_vector_len = self.word_to_vec_map['moon'].shape[0]
    
    def tok_embedding_mat(self, alternative):
        """
        embedder: word2vec
        alternative: list of callable to find synonyms from, inorder of precedence
        """
        synonyms = {} #Dict to store synonyms

        emb_matrix = np.zeros((self.vocab_len, self.embed_vector_len))


        for word, index in tqdm.tqdm(self.word_index.items(), total = len(self.word_index)):
            try: # Try to find in word2vec
                embedding_vector = self.word_to_vec_map[word]
                emb_matrix[index-1, :] = embedding_vector
            except: # Word2vec dont have, find in own synonym dict
                synonym = synonyms.get(word, None) 
                if (synonym) and (synonym in self.word_to_vec_map.index_to_key):
                    emb_matrix[index-1,:] = self.word_to_vec_map[synonym]
                else: # If word2vec, own synonym dict dont have, find from dictionaries
                    for dictionary in alternative:
                        try: 
                            synonym = dictionary(word)
                            if synonym:
                                # print(f'Found synonym: {synonym} for word: {word}')
                                embedding_vector = self.word_to_vec_map[synonym] 
                                emb_matrix[index-1, :] = embedding_vector
                                synonyms[word] = synonym
                        except:
                            continue
        self.syn = synonyms
        
        try:
            pd.to_pickle(emb_matrix, f"{EMBEDDINGS_FOLDER}/emb_matrix_x{self.subset}_tok_{self.maxlen_per_sent}_len{self.token_max_words}.pkl")
        except:
            print('Saved unsuccessfully')
            return emb_matrix

        return emb_matrix


    def pad(self):
        X_pad = pad_sequences(self.sequences, maxlen = self.maxlen_per_sent)
        return X_pad

    def undersample(self):
        undersampler = RandomUnderSampler(random_state=42)
        X_resampled, y_resampled = undersampler.fit_resample(self.X_train, self.y_train)

        return X_resampled, y_resampled


class Train(DataPrep):
    def __init__(self, subset = None, text_prep = 'lem', token_max_words = 5000, maxlen_per_sent = 150, undersample = True):
        super().__init__(subset, text_prep, token_max_words, maxlen_per_sent, undersample)

    def lstm(self, nodes):
        """
        Single layer LSTM
        """
        self.model = Sequential()
        self.model.add(Embedding(input_dim= self.vocab_len, output_dim= self.embed_vector_len, input_shape = (self.maxlen_per_sent,), trainable=False, embeddings_initializer = initializers.Constant(self.emb_matrix)))
        self.model.add(LSTM(512))
        self.model.add(Dense(1, activation = 'sigmoid'))

        self.model.compile(optimizer='adam',
                    loss='binary_crossentropy',
                    metrics=['accuracy'])

        # Train model
        self.model.fit(self.X_train, self.y_train, epochs=10, batch_size=1, verbose=1)  
    
    def lstm_op(self):
        import math

        def objective(trial):
            units = trial.suggest_categorical("units", [32, 64, 128, 256])
            units2 = units//2
            epochs = trial.suggest_categorical("epochs", [10, 20, 30])
            batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
            dropout = trial.suggest_float("dropout", low = 0.1, high = 0.5)
            
            self.model = Sequential()
            self.model.add(Embedding(input_dim= self.vocab_len, output_dim= self.embed_vector_len, input_shape = (self.maxlen_per_sent,), trainable=False, embeddings_initializer = initializers.Constant(self.emb_matrix)))
            self.model.add(LSTM(units))
            self.model.add(Dropout(dropout))
            self.model.add(Dense(units2))
            self.model.add(Dense(1, activation = 'sigmoid'))

            self.model.compile(optimizer='adam',
                            loss='binary_crossentropy',
                            metrics=['accuracy'])

            self.model.fit(self.X_train, self.y_train, epochs= epochs, batch_size= batch_size, verbose=1)  
            _, accuracy = self.model.evaluate(self.X_test, self.y_test, verbose=0)

            return accuracy

        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=10)

        self.best_trial = study.best_trial
        self.best_params = self.best_trial.params
        self.best_accuracy = self.best_trial.value

        print("Best hyperparameters:", self.best_params)
        print("Best accuracy:", self.best_accuracy)


    def predict(self, verbose = False):

        loss, accuracy = self.model.evaluate(self.X_test, self.y_test)
        print("Test Accuracy:", accuracy)

        # Make predictions
        predictions = self.model.predict(self.X_test)

        y_hat = [1 if i> 0.5 else 0 for i in predictions]

        if verbose:
            print("Classification Report:")
            print(classification_report(self.y_test, y_hat))

            print("Confusion Matrix:")
            print(confusion_matrix(self.y_test, y_hat))

    


In [21]:
test = Train()



Dupes removed
Tokenizing..
Finished Tokenizing
Initialising word2vec
lemm/stemm
Embedding...


100%|██████████| 34255/34255 [40:05<00:00, 14.24it/s]  


Finished embedding
Padding
Finished padding
Undersampling..
Counter({0: 20477, 1: 5855})
Counter({0: 5855, 1: 5855})


In [None]:
test.lstm(nodes = 256)

In [27]:
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)


cpu


In [54]:
test.lstm_op()

[I 2024-04-01 12:44:45,351] A new study created in memory with name: no-name-e0932ef5-cc00-491c-901f-bcbe8274a0ea


  super().__init__(**kwargs)


Epoch 1/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 533ms/step - accuracy: 0.5694 - loss: 0.6752
Epoch 2/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 490ms/step - accuracy: 0.7480 - loss: 0.5578
Epoch 3/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 436ms/step - accuracy: 0.8278 - loss: 0.4109
Epoch 4/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 440ms/step - accuracy: 0.9262 - loss: 0.2141
Epoch 5/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 444ms/step - accuracy: 0.9585 - loss: 0.1441
Epoch 6/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 499ms/step - accuracy: 0.9852 - loss: 0.0671
Epoch 7/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 457ms/step - accuracy: 0.9918 - loss: 0.0580
Epoch 8/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 505ms/step - accuracy: 0.9933 - loss: 0.0390
Epoch 9/30
[1m17/17[0m [32m━━━━━━━

[I 2024-04-01 12:49:28,276] Trial 0 finished with value: 0.8999999761581421 and parameters: {'units': 32, 'epochs': 30, 'batch_size': 32, 'dropout': 0.45710706363227016}. Best is trial 0 with value: 0.8999999761581421.


Epoch 1/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 4s/step - accuracy: 0.6011 - loss: 0.6433
Epoch 2/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 4s/step - accuracy: 0.7657 - loss: 0.4722
Epoch 3/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 5s/step - accuracy: 0.8732 - loss: 0.3590
Epoch 4/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 5s/step - accuracy: 0.9533 - loss: 0.1599
Epoch 5/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 4s/step - accuracy: 0.9821 - loss: 0.0657
Epoch 6/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 5s/step - accuracy: 0.9825 - loss: 0.0541
Epoch 7/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 4s/step - accuracy: 0.9913 - loss: 0.0324
Epoch 8/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 4s/step - accuracy: 0.9882 - loss: 0.0286
Epoch 9/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0

[I 2024-04-01 13:04:10,054] Trial 1 finished with value: 0.8969696760177612 and parameters: {'units': 256, 'epochs': 20, 'batch_size': 64, 'dropout': 0.302037602800825}. Best is trial 0 with value: 0.8999999761581421.


Epoch 1/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 771ms/step - accuracy: 0.5310 - loss: 0.6819
Epoch 2/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 783ms/step - accuracy: 0.7539 - loss: 0.5161
Epoch 3/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 744ms/step - accuracy: 0.8852 - loss: 0.3163
Epoch 4/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 763ms/step - accuracy: 0.9786 - loss: 0.0869
Epoch 5/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 765ms/step - accuracy: 0.9950 - loss: 0.0457
Epoch 6/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 752ms/step - accuracy: 0.9931 - loss: 0.0283
Epoch 7/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 774ms/step - accuracy: 0.9901 - loss: 0.0453
Epoch 8/30
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 783ms/step - accuracy: 0.9914 - loss: 0.0245
Epoch 9/30
[1m17/17[0m [32m━━

[I 2024-04-01 13:11:23,671] Trial 2 finished with value: 0.9121212363243103 and parameters: {'units': 32, 'epochs': 30, 'batch_size': 32, 'dropout': 0.23063450739749547}. Best is trial 2 with value: 0.9121212363243103.


Epoch 1/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 1s/step - accuracy: 0.5998 - loss: 0.6735
Epoch 2/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1s/step - accuracy: 0.7410 - loss: 0.5918
Epoch 3/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - accuracy: 0.8106 - loss: 0.4824
Epoch 4/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - accuracy: 0.8284 - loss: 0.3896
Epoch 5/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - accuracy: 0.9488 - loss: 0.2191
Epoch 6/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 914ms/step - accuracy: 0.9778 - loss: 0.0892
Epoch 7/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 664ms/step - accuracy: 0.9499 - loss: 0.1461
Epoch 8/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 593ms/step - accuracy: 0.9694 - loss: 0.1061
Epoch 9/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

[I 2024-04-01 13:14:45,026] Trial 3 finished with value: 0.9212121367454529 and parameters: {'units': 32, 'epochs': 30, 'batch_size': 64, 'dropout': 0.37470173407962626}. Best is trial 3 with value: 0.9212121367454529.


Epoch 1/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 514ms/step - accuracy: 0.5210 - loss: 0.6535
Epoch 2/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 523ms/step - accuracy: 0.7907 - loss: 0.4474
Epoch 3/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 513ms/step - accuracy: 0.9241 - loss: 0.2331
Epoch 4/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 513ms/step - accuracy: 0.9538 - loss: 0.1442
Epoch 5/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 507ms/step - accuracy: 0.9762 - loss: 0.0730
Epoch 6/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 523ms/step - accuracy: 0.9898 - loss: 0.0410
Epoch 7/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 526ms/step - accuracy: 0.9912 - loss: 0.0412
Epoch 8/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 523ms/step - accuracy: 0.9967 - loss: 0.0105
Epoch 9/20
[1m17/17[0m [32m━━━━━━━━━

[I 2024-04-01 13:17:50,325] Trial 4 finished with value: 0.918181836605072 and parameters: {'units': 128, 'epochs': 20, 'batch_size': 32, 'dropout': 0.4636636128186612}. Best is trial 3 with value: 0.9212121367454529.


Epoch 1/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 2s/step - accuracy: 0.6705 - loss: 0.6103
Epoch 2/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 2s/step - accuracy: 0.8987 - loss: 0.2539
Epoch 3/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 2s/step - accuracy: 0.9415 - loss: 0.1886
Epoch 4/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 2s/step - accuracy: 0.9773 - loss: 0.0803
Epoch 5/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 2s/step - accuracy: 0.9462 - loss: 0.1073
Epoch 6/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 2s/step - accuracy: 0.9909 - loss: 0.0360
Epoch 7/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 2s/step - accuracy: 0.9937 - loss: 0.0333
Epoch 8/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 2s/step - accuracy: 0.9894 - loss: 0.0240
Epoch 9/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

[I 2024-04-01 13:24:03,194] Trial 5 finished with value: 0.9090909361839294 and parameters: {'units': 256, 'epochs': 10, 'batch_size': 32, 'dropout': 0.2626514870278893}. Best is trial 3 with value: 0.9212121367454529.


Epoch 1/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 347ms/step - accuracy: 0.6471 - loss: 0.6277
Epoch 2/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 346ms/step - accuracy: 0.8003 - loss: 0.4225
Epoch 3/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 349ms/step - accuracy: 0.9415 - loss: 0.1761
Epoch 4/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 345ms/step - accuracy: 0.9595 - loss: 0.1213
Epoch 5/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 354ms/step - accuracy: 0.9958 - loss: 0.0412
Epoch 6/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 348ms/step - accuracy: 0.9936 - loss: 0.0351
Epoch 7/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 346ms/step - accuracy: 0.9957 - loss: 0.0250
Epoch 8/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 350ms/step - accuracy: 0.9725 - loss: 0.0812
Epoch 9/20
[1m17/17[0m [32m━━━━━━━━━━

[I 2024-04-01 13:26:07,411] Trial 6 finished with value: 0.9212121367454529 and parameters: {'units': 32, 'epochs': 20, 'batch_size': 32, 'dropout': 0.22297123095543092}. Best is trial 3 with value: 0.9212121367454529.


Epoch 1/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 1s/step - accuracy: 0.6338 - loss: 0.6378
Epoch 2/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 1s/step - accuracy: 0.7650 - loss: 0.4482
Epoch 3/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.9092 - loss: 0.2435
Epoch 4/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2s/step - accuracy: 0.9603 - loss: 0.1436
Epoch 5/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.9887 - loss: 0.0701
Epoch 6/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2s/step - accuracy: 0.9926 - loss: 0.0376
Epoch 7/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.9897 - loss: 0.0402
Epoch 8/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.9899 - loss: 0.0391
Epoch 9/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0

[I 2024-04-01 13:33:02,756] Trial 7 finished with value: 0.9060605764389038 and parameters: {'units': 128, 'epochs': 30, 'batch_size': 64, 'dropout': 0.43324029512447715}. Best is trial 3 with value: 0.9212121367454529.


Epoch 1/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 1s/step - accuracy: 0.6538 - loss: 0.6424
Epoch 2/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.7758 - loss: 0.4610
Epoch 3/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 1s/step - accuracy: 0.8597 - loss: 0.3473
Epoch 4/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 1s/step - accuracy: 0.9218 - loss: 0.1865
Epoch 5/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 1s/step - accuracy: 0.9835 - loss: 0.0712
Epoch 6/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 1s/step - accuracy: 0.9876 - loss: 0.0516
Epoch 7/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.9925 - loss: 0.0265
Epoch 8/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 1s/step - accuracy: 0.9897 - loss: 0.0438
Epoch 9/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0

[I 2024-04-01 13:37:38,891] Trial 8 finished with value: 0.9060605764389038 and parameters: {'units': 128, 'epochs': 20, 'batch_size': 64, 'dropout': 0.21696542515471423}. Best is trial 3 with value: 0.9212121367454529.


Epoch 1/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - accuracy: 0.5689 - loss: 0.6744
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1s/step - accuracy: 0.7092 - loss: 0.5973
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1s/step - accuracy: 0.7410 - loss: 0.5295
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 0.8207 - loss: 0.4035
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 0.8543 - loss: 0.3219
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step - accuracy: 0.9454 - loss: 0.1774
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1s/step - accuracy: 0.9650 - loss: 0.1165
Epoch 8/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1s/step - accuracy: 0.9609 - loss: 0.0987
Epoch 9/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/st

[I 2024-04-01 13:38:55,218] Trial 9 finished with value: 0.9151515364646912 and parameters: {'units': 64, 'epochs': 10, 'batch_size': 128, 'dropout': 0.31431720536965646}. Best is trial 3 with value: 0.9212121367454529.


Best hyperparameters: {'units': 32, 'epochs': 30, 'batch_size': 64, 'dropout': 0.37470173407962626}
Best accuracy: 0.9212121367454529
