In [3]:
from functions import *
import wordninja
import requests
import random
import pandas as pd
import os

# Torch cannot work properly in jupyter notebook
# import os
# count = 0 
# if count == 0:
#     os.chdir("test_dir")
#     count += 1


  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package stopwords to C:\Users\Zoe
[nltk_data]     Lua\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to C:\Users\Zoe
[nltk_data]     Lua\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Zoe
[nltk_data]     Lua\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [4]:
os.getcwd()

'c:\\Users\\Zoe Lua\\DSA4266_Grp2'

In [5]:
## CONFIG

df_path = "Data/full_df.pkl"
X_name = 'processed'
y_name = 'class'

#### For preprocessing
all_maxlen_per_sent = [150]
all_token_max_words = [5000]


In [6]:
## Semantic Dictionaries

def get_synonyms_conceptnet(word):
    synonyms = []
    url = f'http://api.conceptnet.io/c/en/{word}?filter=/c/en'
    response = requests.get(url)
    data = response.json()
    for edge in data['edges']:
        if edge['rel']['label'] == 'Synonym' and edge['start']['language'] == 'en' and edge['end']['language'] == 'en':
            start = edge['start']['label']
            end = edge['end']['label']
            synonyms.append(end if start == word else start)

    if synonyms != []:
        synonym = random.choice(synonyms)
    else:
        synonym = synonyms
    return synonym

def get_synonyms_wordnet(word):
    synonyms = []
    synsets = wordnet.synsets(word)
    for synset in synsets:
        synonyms.extend([lemma.name() for lemma in synset.lemmas() if lemma.name() != word])

    if synonyms != []:
        synonym = random.choice(synonyms)
    else:
        synonym = synonyms
    return synonym

In [44]:
class DataPrep():
    def __init__(self, subset = None, text_prep = 'lem', token_max_words = 5000, maxlen_per_sent = 150, undersample = True):
        """
        subset: X[:subset]
        """
        self.df = pd.read_pickle(df_path)
        self.subset = subset
        self.maxlen_per_sent = maxlen_per_sent

        self.remove_duplicates()
        print('Dupes removed')
        self.X = self.df[X_name]
        self.y = self.df[y_name].apply(lambda x: 1 if x == 'spam' else 0)
        self.token_max_words = token_max_words

        if self.subset:
            self.X = self.X[:self.subset]
            self.y = self.y[:self.subset]
        
        print('Tokenizing..')
        self.tokenize()
        print('Finished Tokenizing')

        print('Initialising word2vec')
        self.word_to_vec_map = self.word2vec()

        print('lemm/stemm')
        if text_prep == 'lem':
            self.X = self.lemming()
        if text_prep == 'stem':
            self.X = self.stemming()

        print('Embedding...')
        self.pre_embed()
        path = f'embeddings\emb_matrix_x{self.subset}_tok_{self.maxlen_per_sent}_len{self.token_max_words}.pkl'
        if os.path.exists(path):
            self.emb_matrix = pd.read_pickle(path)
        else:
            self.emb_matrix = self.tok_embedding_mat(alternative = [get_synonyms_conceptnet, get_synonyms_wordnet])
            print('Finished embedding')

        print('Padding')
        X_pad = self.pad()
        print('Finished padding')

        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X_pad, self.y, test_size=0.33, random_state=42)

        if undersample:
            print('Undersampling..')
            print(Counter(self.y_train))
            self.X_train, self.y_train = self.undersample()
            print(Counter(self.y_train))




    def remove_duplicates(self):
    
        ## First remove all those X values with differing binary y values
        occurrences = self.df.groupby([X_name, y_name]).size().reset_index(name='count')
        duplicates = occurrences[occurrences.duplicated(subset=X_name, keep=False)]
        for index, row in duplicates.iterrows():
            x_value = row[X_name]
            max_count = occurrences[(occurrences[X_name] == x_value)].max()['count']
            occurrences.drop(occurrences[(occurrences[X_name] == x_value) & (occurrences['count'] != max_count)].index, inplace=True)

        ## Remove duplicates
        self.df = occurrences.drop_duplicates(subset = X_name).reset_index(drop = True)
    
    def tokenize(self, join = False):
        def tokenize_helper(text, join = False):
            stop_words = set(stopwords.words('english'))
            tokens = word_tokenize(text)
            tokens = [word.lower() for word in tokens if word.lower() not in stop_words]

            if join:
                tokens = ' '.join([''.join(c for c in word if c not in string.punctuation) for word in tokens if word])
        
            return tokens
        
        self.X = self.X.apply(lambda x: tokenize_helper(x, join))

    ## Embedders
        
    def word2vec(self):
        from gensim.models.word2vec import Word2Vec
        import gensim.downloader as api

        word_to_vec_map = api.load("word2vec-google-news-300")

        return word_to_vec_map
    
    
    ## Stemming/ Lemmetization

    def stemming(self):
        ps = PorterStemmer()

        def stem(row):
            print(row)
            stemmed = []
            for word in row:
                stemmed += [ps.stem(word)]
            print('STEMMED:', stemmed)

            return stemmed

        return self.X.apply(stem)
    

    def lemming(self):

        def lem(row):
            lemmatizer = WordNetLemmatizer()
            lemmed = [lemmatizer.lemmatize(word) for word in row]
            # print(row)
            # print(lemmed,"\n")
            return lemmed

        return self.X.apply(lem)
    
    def pre_embed(self):
        self.tokenizer = text.Tokenizer(num_words=self.token_max_words)
        self.tokenizer.fit_on_texts(self.X)

        self.sequences = self.tokenizer.texts_to_sequences(self.X)

        self.word_index = self.tokenizer.word_index
        self.vocab_len = len(self.word_index) + 1
        self.embed_vector_len = self.word_to_vec_map['moon'].shape[0]
    
    def tok_embedding_mat(self, alternative):
        """
        embedder: word2vec
        alternative: list of callable to find synonyms from, inorder of precedence
        """

        emb_matrix = np.zeros((self.vocab_len, self.embed_vector_len))


        for word, index in tqdm.tqdm(self.word_index.items(), total = len(self.word_index)):
            try:
                embedding_vector = self.word_to_vec_map[word]
                emb_matrix[index-1, :] = embedding_vector
            except:
                for dictionary in alternative:
                    try: 
                        synonym = dictionary(word)
                        if synonym:
                            # print(f'Found synonym: {synonym} for word: {word}')
                            embedding_vector = self.word_to_vec_map[synonym] 
                            emb_matrix[index-1, :] = embedding_vector
                            break
                    except:
                        continue
        pd.to_pickle(emb_matrix, f"../embeddings/emb_matrix_x{self.subset}_tok_{self.maxlen_per_sent}_len{self.token_max_words}.pkl")

        return emb_matrix


    def pad(self):
        X_pad = pad_sequences(self.sequences, maxlen = self.maxlen_per_sent)
        return X_pad

    def undersample(self):
        undersampler = RandomUnderSampler(random_state=42)
        X_resampled, y_resampled = undersampler.fit_resample(self.X_train, self.y_train)

        return X_resampled, y_resampled


class Train(DataPrep):
    def __init__(self, subset = None, text_prep = 'lem', token_max_words = 5000, maxlen_per_sent = 150, undersample = True):
        super().__init__(subset, text_prep, token_max_words, maxlen_per_sent, undersample)

    def lstm(self, nodes):
        """
        Single layer LSTM
        """
        self.model = Sequential()
        self.model.add(Embedding(input_dim= self.vocab_len, output_dim= self.embed_vector_len, input_shape = (self.maxlen_per_sent,), trainable=False, embeddings_initializer = initializers.Constant(self.emb_matrix)))
        self.model.add(LSTM(512))
        self.model.add(Dense(1, activation = 'sigmoid'))

        self.model.compile(optimizer='adam',
                    loss='binary_crossentropy',
                    metrics=['accuracy'])

        # Train model
        self.model.fit(self.X_train, self.y_train, epochs=10, batch_size=1, verbose=1)  
    
    def lstm_op(self):
        import math

        def objective(trial):
            units = trial.suggest_categorical("units", [32, 64, 128, 256])
            units2 = round(math.sqrt(units))
            epochs = trial.suggest_categorical("epochs", [10, 20, 30])
            batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
            dropout = trial.suggest_float("dropout", low = 0.1, high = 0.5)
            
            self.model = Sequential()
            self.model.add(Embedding(input_dim= self.vocab_len, output_dim= self.embed_vector_len, input_shape = (self.maxlen_per_sent,), trainable=False, embeddings_initializer = initializers.Constant(self.emb_matrix)))
            self.model.add(LSTM(units))
            self.model.add(Dropout(dropout))

            self.model.add(Dense(units2))
            self.model.add(Dense(1, activation = 'sigmoid'))

            self.model.compile(optimizer='adam',
                            loss='binary_crossentropy',
                            metrics=['accuracy'])

            self.model.fit(self.X_train, self.y_train, epochs= epochs, batch_size= batch_size, verbose=1)  
            _, accuracy = self.model.evaluate(self.X_test, self.y_test, verbose=0)

            return accuracy

        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=10)

        self.best_trial = study.best_trial
        self.best_params = self.best_trial.params
        self.best_accuracy = self.best_trial.value

        print("Best hyperparameters:", self.best_params)
        print("Best accuracy:", self.best_accuracy)


    def predict(self, verbose = False):

        loss, accuracy = self.model.evaluate(self.X_test, self.y_test)
        print("Test Accuracy:", accuracy)

        # Make predictions
        predictions = self.model.predict(self.X_test)

        y_hat = [1 if i> 0.5 else 0 for i in predictions]

        if verbose:
            print("Classification Report:")
            print(classification_report(self.y_test, y_hat))

            print("Confusion Matrix:")
            print(confusion_matrix(self.y_test, y_hat))

    


  path = f'embeddings\emb_matrix_x{self.subset}_tok_{self.maxlen_per_sent}_len{self.token_max_words}.pkl'


In [45]:
test = Train(subset = 500)

Dupes removed
Tokenizing..
Finished Tokenizing
Initialising word2vec
lemm/stemm
Embedding...
Padding
Finished padding
Undersampling..
Counter({0: 196, 1: 139})
Counter({0: 139, 1: 139})


In [46]:
test.lstm_op()

[I 2024-03-31 22:28:49,582] A new study created in memory with name: no-name-7f6696dc-9a06-4f88-89bb-fa311308bd27


  super().__init__(**kwargs)


Epoch 1/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 118ms/step - accuracy: 0.5802 - loss: 0.6578
Epoch 2/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 118ms/step - accuracy: 0.8185 - loss: 0.5009
Epoch 3/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 123ms/step - accuracy: 0.8070 - loss: 0.3820
Epoch 4/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 141ms/step - accuracy: 0.9141 - loss: 0.2457
Epoch 5/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 127ms/step - accuracy: 0.9739 - loss: 0.1103
Epoch 6/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 122ms/step - accuracy: 0.9889 - loss: 0.0634
Epoch 7/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 140ms/step - accuracy: 0.9788 - loss: 0.0587
Epoch 8/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 116ms/step - accuracy: 0.9874 - loss: 0.0478
Epoch 9/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

[I 2024-03-31 22:29:30,910] Trial 0 finished with value: 0.903030276298523 and parameters: {'units': 32, 'epochs': 30, 'batch_size': 32, 'dropout': 0.145162878487423}. Best is trial 0 with value: 0.903030276298523.


Epoch 1/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 142ms/step - accuracy: 0.6487 - loss: 0.6668
Epoch 2/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 168ms/step - accuracy: 0.7656 - loss: 0.5520
Epoch 3/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 130ms/step - accuracy: 0.7997 - loss: 0.4838
Epoch 4/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 122ms/step - accuracy: 0.8197 - loss: 0.3872
Epoch 5/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 122ms/step - accuracy: 0.8817 - loss: 0.3088
Epoch 6/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 122ms/step - accuracy: 0.9572 - loss: 0.1926
Epoch 7/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 124ms/step - accuracy: 0.9708 - loss: 0.1288
Epoch 8/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 125ms/step - accuracy: 0.9785 - loss: 0.0851
Epoch 9/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

[I 2024-03-31 22:29:50,436] Trial 1 finished with value: 0.8909090757369995 and parameters: {'units': 32, 'epochs': 20, 'batch_size': 64, 'dropout': 0.33064798001600804}. Best is trial 0 with value: 0.903030276298523.


Epoch 1/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 135ms/step - accuracy: 0.5439 - loss: 0.6776
Epoch 2/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step - accuracy: 0.7972 - loss: 0.6227
Epoch 3/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step - accuracy: 0.8099 - loss: 0.5637
Epoch 4/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step - accuracy: 0.8175 - loss: 0.5160
Epoch 5/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step - accuracy: 0.8091 - loss: 0.4992
Epoch 6/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step - accuracy: 0.8323 - loss: 0.4225
Epoch 7/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - accuracy: 0.8398 - loss: 0.3726
Epoch 8/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step - accuracy: 0.8680 - loss: 0.3416
Epoch 9/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

[I 2024-03-31 22:30:04,806] Trial 2 finished with value: 0.8787878751754761 and parameters: {'units': 32, 'epochs': 20, 'batch_size': 128, 'dropout': 0.13676090564396914}. Best is trial 0 with value: 0.903030276298523.


Epoch 1/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 218ms/step - accuracy: 0.7073 - loss: 0.6368
Epoch 2/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 198ms/step - accuracy: 0.7614 - loss: 0.4814
Epoch 3/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 183ms/step - accuracy: 0.8443 - loss: 0.3691
Epoch 4/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 165ms/step - accuracy: 0.9069 - loss: 0.2194
Epoch 5/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 173ms/step - accuracy: 0.9696 - loss: 0.1191
Epoch 6/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 237ms/step - accuracy: 0.9702 - loss: 0.0688
Epoch 7/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 184ms/step - accuracy: 0.9685 - loss: 0.0899
Epoch 8/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 233ms/step - accuracy: 0.9958 - loss: 0.0459
Epoch 9/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

[I 2024-03-31 22:30:45,336] Trial 3 finished with value: 0.8969696760177612 and parameters: {'units': 128, 'epochs': 20, 'batch_size': 32, 'dropout': 0.48896846326570154}. Best is trial 0 with value: 0.903030276298523.


Epoch 1/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 146ms/step - accuracy: 0.7305 - loss: 0.6297
Epoch 2/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 144ms/step - accuracy: 0.7634 - loss: 0.5063
Epoch 3/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 155ms/step - accuracy: 0.7960 - loss: 0.3923
Epoch 4/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 153ms/step - accuracy: 0.8521 - loss: 0.2930
Epoch 5/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 163ms/step - accuracy: 0.9579 - loss: 0.1137
Epoch 6/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 150ms/step - accuracy: 0.9754 - loss: 0.0595
Epoch 7/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 143ms/step - accuracy: 0.9896 - loss: 0.0345
Epoch 8/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 141ms/step - accuracy: 0.9984 - loss: 0.0185
Epoch 9/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

[I 2024-03-31 22:31:08,034] Trial 4 finished with value: 0.8666666746139526 and parameters: {'units': 64, 'epochs': 10, 'batch_size': 32, 'dropout': 0.21506017509820344}. Best is trial 0 with value: 0.903030276298523.


Epoch 1/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 752ms/step - accuracy: 0.6418 - loss: 0.6495
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 728ms/step - accuracy: 0.7603 - loss: 0.5047
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 715ms/step - accuracy: 0.8110 - loss: 0.3852
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 682ms/step - accuracy: 0.8868 - loss: 0.3061
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 660ms/step - accuracy: 0.9145 - loss: 0.1920
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 662ms/step - accuracy: 0.9621 - loss: 0.1255
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 737ms/step - accuracy: 0.9785 - loss: 0.0884
Epoch 8/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 672ms/step - accuracy: 0.9920 - loss: 0.0414
Epoch 9/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

[I 2024-03-31 22:31:49,755] Trial 5 finished with value: 0.8545454740524292 and parameters: {'units': 256, 'epochs': 10, 'batch_size': 64, 'dropout': 0.4494049270982179}. Best is trial 0 with value: 0.903030276298523.


Epoch 1/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 186ms/step - accuracy: 0.6390 - loss: 0.6614
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 158ms/step - accuracy: 0.7379 - loss: 0.5725
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 168ms/step - accuracy: 0.7895 - loss: 0.4874
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 185ms/step - accuracy: 0.7923 - loss: 0.4074
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 158ms/step - accuracy: 0.8338 - loss: 0.3585
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 153ms/step - accuracy: 0.9347 - loss: 0.1977
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 153ms/step - accuracy: 0.9761 - loss: 0.0959
Epoch 8/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 159ms/step - accuracy: 0.9825 - loss: 0.0610
Epoch 9/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

[I 2024-03-31 22:32:03,714] Trial 6 finished with value: 0.8969696760177612 and parameters: {'units': 64, 'epochs': 10, 'batch_size': 64, 'dropout': 0.4497249394189913}. Best is trial 0 with value: 0.903030276298523.


Epoch 1/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 366ms/step - accuracy: 0.6100 - loss: 0.6621
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 361ms/step - accuracy: 0.7519 - loss: 0.5281
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 385ms/step - accuracy: 0.7775 - loss: 0.4326
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 356ms/step - accuracy: 0.8546 - loss: 0.3472
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 355ms/step - accuracy: 0.8962 - loss: 0.2508
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 343ms/step - accuracy: 0.9734 - loss: 0.1097
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 339ms/step - accuracy: 0.9827 - loss: 0.0812
Epoch 8/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 347ms/step - accuracy: 0.9167 - loss: 0.1978
Epoch 9/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

[I 2024-03-31 22:32:28,640] Trial 7 finished with value: 0.8545454740524292 and parameters: {'units': 128, 'epochs': 10, 'batch_size': 64, 'dropout': 0.21853321855234267}. Best is trial 0 with value: 0.903030276298523.


Epoch 1/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 748ms/step - accuracy: 0.5668 - loss: 0.6488
Epoch 2/10


FrozenTrial(number=1, state=1, values=[0.8969696760177612], datetime_start=datetime.datetime(2024, 3, 31, 22, 19, 58, 558387), datetime_complete=datetime.datetime(2024, 3, 31, 22, 20, 33, 713916), params={'units': 32, 'epochs': 30, 'batch_size': 32, 'dropout': 0.1826917779294587}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'units': CategoricalDistribution(choices=(32, 64, 128, 256)), 'epochs': CategoricalDistribution(choices=(10, 20, 30)), 'batch_size': CategoricalDistribution(choices=(32, 64, 128)), 'dropout': FloatDistribution(high=0.5, log=False, low=0.1, step=None)}, trial_id=1, value=None)