# Note !

**You can see the difference between deep learning models in your web browser from write that below line in terminal**


"tensorboard --logdir=./models/dl_models/tensor_logs --port=6006"

# Or run cell below

In [1]:
%load_ext tensorboard
%tensorboard --logdir=./models/dl_models/tensor_logs --port=6006

In [None]:
import pandas as pd
from data_shuffling_split import *
from features_extraction import *
from data_preprocess import *
from ml_modeling import *
from keras_models import *
from configs import *


In [None]:
def train_val_test_score(model, x_train, y_train, x_val, y_val, x_test, y_test):
    
    print("On Training set\n")
    keras_f1_score_result(model, x_train, y_train)
    print("="*50)
    print("On Validation set \n")
    keras_f1_score_result(model, x_val, y_val)
    print("="*50)
    print("On Training \n")
    keras_f1_score_result(model, x_test, y_test)
    
    return True

# Tokenize All data

In [None]:
# Train and Validation data
strat_train_set = read_csv("train/strat_train_set.csv")
x_train_text, x_val_text, y_train, y_val = prepare_data(strat_train_set)

# Test
strat_test_set = pd.read_csv("dataset/test/strat_test_set.csv")
x_test_text, y_test = list(strat_test_set['text']), strat_test_set['dialect_l_encoded'].values


x_train_text_tokenized = tokenize_using_nltk_TreebankWordTokenizer(x_train_text)

print("Before Tokenization : \n", x_train_text[:3])
print("="*50)
print("After Tokenization : \n", x_train_text_tokenized[:3])
print("="*50)

x_val_text_tokenized = tokenize_using_nltk_TreebankWordTokenizer(x_val_text)

print("Before Tokenization : \n", x_val_text[:3])
print("="*50)
print("After Tokenization : \n", x_val_text_tokenized[:3])


x_test_text_tokenized = tokenize_using_nltk_TreebankWordTokenizer(x_test_text)

print("Before Tokenization : \n", x_test_text[:3])
print("="*50)
print("After Tokenization : \n", x_test_text_tokenized[:3])
print("="*50)

# Abo Bakr Word2vec

In [None]:
number_of_features = 100
max_len_str = 64

word_to_vec_model = load_word2vec_model("models/word2vec/bakrianoo_unigram_cbow_model/full_uni_cbow_100_twitter.mdl")

X_train_embed_matrix = text_to_matrix_using_word2vec(word_to_vec_model, x_train_text_tokenized, max_len_str)
X_val_embed_matrix = text_to_matrix_using_word2vec(word_to_vec_model, x_val_text_tokenized, max_len_str)
x_test_embed_matrix = text_to_matrix_using_word2vec(word_to_vec_model, x_test_text_tokenized, max_len_str)

# Reshape because of deep learning model
X_train_embed_matrix = X_train_embed_matrix.reshape(X_train_embed_matrix.shape[0], max_len_str, number_of_features)
X_val_embed_matrix = X_val_embed_matrix.reshape(X_val_embed_matrix.shape[0], max_len_str, number_of_features)
x_test_embed_matrix = x_test_embed_matrix.reshape(x_test_embed_matrix.shape[0], max_len_str, number_of_features)

In [None]:
# Test using Adam_lstm_no_batch

model_path    = "models/dl_models/run_with_bakr_word2vec_Adam_lstm_no_batch_learning_rate=0.1__model.h5"
model = keras_load_model(model_path)

_ = train_val_test_score(model, X_train_embed_matrix, y_train, X_val_embed_matrix, y_val, 
                         x_test_embed_matrix, y_test)

In [None]:
# Test using Adam_lstm_with_batch

model_path    = "models/dl_models/run_with_bakr_word2vec_Adam_lstm_with_batch_learning_rate=0.1__model.h5"
model = keras_load_model(model_path)

_ = train_val_test_score(model, X_train_embed_matrix, y_train, X_val_embed_matrix, y_val, 
                         x_test_embed_matrix, y_test)

In [None]:
# Test using Rmsprob_lstm_with_batch

model_path    = "models/dl_models/run_with_bakr_word2vec_Rmsprob_lstm_with_batch_learning_rate=0.1__model.h5"
model = keras_load_model(model_path)

_ = train_val_test_score(model, X_train_embed_matrix, y_train, X_val_embed_matrix, y_val, 
                         x_test_embed_matrix, y_test)

In [None]:
# Test using RMSprop_lstm_no_batch

model_path    = "models/dl_models/run_with_bakr_word2vec_RMSprop_lstm_no_with_batch_learning_rate=0.1__model.h5"
model = keras_load_model(model_path)

_ = train_val_test_score(model, X_train_embed_matrix, y_train, X_val_embed_matrix, y_val, 
                         x_test_embed_matrix, y_test)

In [None]:
# Test using sgd_lstm_no_batch

model_path    = "models/dl_models/run_with_bakr_word2vec_sgd_lstm_no_batch_learning_rate=0.1__model.h5"
model = keras_load_model(model_path)

_ = train_val_test_score(model, X_train_embed_matrix, y_train, X_val_embed_matrix, y_val, 
                         x_test_embed_matrix, y_test)

In [None]:
# Test using sgd_lstm_with_batch

model_path    = "models/dl_models/run_with_bakr_word2vec_sgd_lstm_with_batch_learning_rate=0.1__model.h5"
model = keras_load_model(model_path)

_ = train_val_test_score(model, X_train_embed_matrix, y_train, X_val_embed_matrix, y_val, 
                         x_test_embed_matrix, y_test)

#  Rezk Word2vec

In [None]:
number_of_features = 300
max_len_str = 64

word2vec_path = "rezk_unigram_CBOW_model/train_word2vec_cbow__window_3_min_count_300"
word_to_vec_model = load_word2vec_model("models/word2vec/" + word2vec_path)

X_train_embed_matrix = text_to_matrix_using_word2vec(word_to_vec_model, x_train_text_tokenized, max_len_str)
X_val_embed_matrix = text_to_matrix_using_word2vec(word_to_vec_model, x_val_text_tokenized, max_len_str)
x_test_embed_matrix = text_to_matrix_using_word2vec(word_to_vec_model, x_test_text_tokenized, max_len_str)

# Reshape because of deep learning model
X_train_embed_matrix = X_train_embed_matrix.reshape(X_train_embed_matrix.shape[0], max_len_str, number_of_features)
X_val_embed_matrix = X_val_embed_matrix.reshape(X_val_embed_matrix.shape[0], max_len_str, number_of_features)
x_test_embed_matrix = x_test_embed_matrix.reshape(x_test_embed_matrix.shape[0], max_len_str, number_of_features)

In [None]:
# Test using Adam_lstm_no_batch

model_path    = "models/dl_models/run_with_rezk_word2vec_Adam_lstm_no_batch_learning_rate=0.1__model.h5"
model = keras_load_model(model_path)

_ = train_val_test_score(model, X_train_embed_matrix, y_train, X_val_embed_matrix, y_val, 
                         x_test_embed_matrix, y_test)

In [None]:
# Test using Adam_lstm_with_batch

model_path    = "models/dl_models/run_with_rezk_word2vec_Adam_lstm_with_batch_learning_rate=0.1__model.h5"
model = keras_load_model(model_path)

_ = train_val_test_score(model, X_train_embed_matrix, y_train, X_val_embed_matrix, y_val, 
                         x_test_embed_matrix, y_test)

In [None]:
# Test using RMSprop_lstm_no_batch
model_path    = "models/dl_models/run_with_rezk_word2vec_Rmsprob_lstm_no_batch_learning_rate=0.1__model.h5"

model = keras_load_model(model_path)

_ = train_val_test_score(model, X_train_embed_matrix, y_train, X_val_embed_matrix, y_val, 
                         x_test_embed_matrix, y_test)

In [None]:
# Test using Rmsprob_lstm_with_batch

model_path    = "models/dl_models/run_with_rezk_word2vec_Rmsprob_lstm_with_batch_learning_rate=0.1__model.h5"
model = keras_load_model(model_path)

_ = train_val_test_score(model, X_train_embed_matrix, y_train, X_val_embed_matrix, y_val, 
                         x_test_embed_matrix, y_test)

In [None]:
# Test using sgd_lstm_no_batch

model_path    = "models/dl_models/run_with_rezk_word2vec_sgd_lstm_no_batch_learning_rate=0.1__model.h5"
model = keras_load_model(model_path)

_ = train_val_test_score(model, X_train_embed_matrix, y_train, X_val_embed_matrix, y_val, 
                         x_test_embed_matrix, y_test)

In [None]:
# Test using sgd_lstm_with_batch

model_path    = "models/dl_models/run_with_rezk_word2vec_sgd_lstm_with_batch_learning_rate=0.1__model.h5"
model = keras_load_model(model_path)

_ = train_val_test_score(model, X_train_embed_matrix, y_train, X_val_embed_matrix, y_val, 
                         x_test_embed_matrix, y_test)

# Conclusion

If we compare the model prediction to the human prediction, we may conclude that the task of predict the dialect is semi difficult task for human. So how its if we that compare to the model !.

