In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#@title Initial Imports + Helper functions
import tensorflow as tf
from IPython.display import display
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import re
import random
from sklearn.model_selection import train_test_split
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import ModelCheckpoint
import time
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from numpy import dot
from numpy.linalg import norm
from sklearn.metrics import classification_report
from sklearn.utils import shuffle
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
import tensorflow.keras.backend as K

from tensorflow.keras.utils import to_categorical  ## maintain tf.keras always


def cosine_similarity(a, b):
### https://stackoverflow.com/questions/18424228/cosine-similarity-between-2-number-lists
    cos_sim = np.inner(a, b) / (norm(a) * norm(b))    
    return cos_sim

def get_various_metrics_and_print(Y_true, Y_predicted):
    TN, FP, FN, TP = confusion_matrix(Y_true, Y_predicted).ravel()
    accuracy = (TP + TN)/(TP+TN+FP+FN)
    recall = (TP)/(TP + FN)
    specificity = (TN)/(TN + FP) # TNR
    false_positive_rate = (FP)/(TN + FP) # false_positive_rate = 1 - TNR
    precision = (TP)/(TP + FP)
    false_discovery_rate = (FP)/(TP + FP)
    neg_predicted_val = (TN)/(TN + FN)
    f1_score = 2*((precision * recall) / (precision + recall))

    print("TN = ", TN, " FP = ", FP, " FN = ", FN, " TP = ", TP)
    print("Accuracy = ", accuracy*100, "%")
    print("TPR = Sensitivity = Recall = ", recall*100, "%")
    print("TNR = Specificity = ", specificity*100, "%")
    print("Precision = PPV = Positive Predictive Value = ", precision*100, "%")
    print("FDR = False Discovery Rate = ", false_discovery_rate*100, "%")
    print("FPR = False Positive Rate = ", false_positive_rate*100, "%")
    print("F1 Score = ", f1_score*100, "%")
    print("Neg Predicted Val = ", neg_predicted_val*100, "%")
    print("\n")
    c_report = classification_report(y_true=Y_true, y_pred=Y_predicted)
    print(c_report)
    # return precision, recall, f1_score

def plot_confusion_matrix(Y_true=None, Y_predicted=None):
    ## https://medium.com/@dtuk81/confusion-matrix-visualization-fc31e3f30fea
    cf_matrix = confusion_matrix(Y_true, Y_predicted)
    print(cf_matrix)
    # group_names = ['True Neg','False Pos','False Neg','True Pos']
    group_names = ['TN','FP','FN','TP']
    group_counts = ["{0:0.0f}".format(value) for value in cf_matrix.flatten()]
    group_percentages = ["{0:.2%}".format(value) for value in cf_matrix.flatten()/np.sum(cf_matrix)]
    labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in zip(group_names,group_counts,group_percentages)]
    labels = np.asarray(labels).reshape(2,2)
    sns.heatmap(cf_matrix, annot=labels, fmt='', cmap=plt.cm.magma)
    plt.xlabel("Predicted")
    plt.ylabel("Actual")


"""
  Loads the stop words into a list.
"""
def load_stop_words_list_and_dict(file):
  fin = open(file, 'r', encoding="utf-8") # open file
  st_words = fin.readlines() # load st-words into variable [list]
  fin.close() # close file
  stop_words_list = []
  for word in st_words: # remove new-lines/other spaces
    word = word.strip()
    stop_words_list.append(word)
  
  stop_words_dict = {} # dictionary of key-value pairs. [to do easy checking]
  for sw in stop_words_list:
          stop_words_dict[sw.strip()] = "" # for each stop word, it's replacement will be an empty-string
          
  return stop_words_list, stop_words_dict


"""
  Given a sentence, it removes the stop-words and other things [stemming]
  Input: sentence, list of stop-words
  Output: stemmed sentence
"""
def obtain_stemmed_sentence(sentence, stop_words_dict): # Base code: Rafi
  stemmed_sentence = sentence # first copy the sentence
  stemmed_sentence = stemmed_sentence.replace("\n", "") # replace new line with empty-string
  # remove english words
  stemmed_sentence = re.sub(r'[।\\/“”0-9@&$%+_=<>~*#০১২৩৪৫৬৭৮৯…(){}\[\]\?\!\.a-zA-Z\…\|]+','',stemmed_sentence) # replace theese with empty-string [Mahim]
  sentences_split = re.split('[–—\s.,\-\‘\’\‘\'\\\":]',stemmed_sentence) # split wrt these delims
  label = ""
  for x in sentences_split:
    if x not in stop_words_dict and len(x) >= 2: # if word is not a stop-word append by adding a space
        label = label + " " + x
#   label = label.replace("  ", " ") # replace double spaces with one space
  return label

  
def read_from_file_and_load_vocab():
    vocab_list = []
    in_file = "/content/drive/My Drive/Data_Initial/Bangla-Vocab-22-Aug-withoutEnglish.txt"
    print(f"in_file = {in_file}")
    fin1 = open(in_file,'r', encoding="utf-8")
    for line in fin1.readlines():
        if len(line) > 1:
            vocab_list.append(line.strip())
    fin1.close()
    return vocab_list


## write_to_file_vocab()


def get_vocab_dictionary(vocab_list):
    vocab_dict = {}
    cnt = 0
    for word in vocab_list:
        if word not in vocab_dict:
            vocab_dict[word] = cnt
        cnt += 1
    return vocab_dict



def get_encoded_content(stemmed_sentence, vocab_dict):
    list_encoded_content = []
    for word in stemmed_sentence.split():
        if word in vocab_dict:
            encoded_int = vocab_dict[word] + 1 # since '0' is used for padding
            list_encoded_content.append(encoded_int)
    return list_encoded_content


def get_f1(y_true, y_pred): #taken from old keras source code
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

def get_recall(y_true, y_pred): #taken from old keras source code
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return recall

  import pandas.util.testing as tm


In [3]:
RANDOM_STATE = 2245 # for ensuring reproducible results.
tf.random.set_seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)

LEN_DOC_CONTENT = 400
file_stop_words_bangla = "/content/drive/My Drive/Data_Initial/STOP_WORDS_BANGLA.txt"
FILE_TRAIN_AND_VALIDATION ="/content/drive/My Drive/Data_Initial/contents-labels-train-val.csv"

DATE_TIME = "7Sept-4am"

def get_fileModel_and_fileWeights(num, model_name="CNN", NUM_MODELS = 10, synth_type="No", BASE_PATH="/content/drive/My Drive/Ensembling/Ens-Models/"):
    file_name = BASE_PATH + "ens-"+ model_name + "-numModels-" + str(NUM_MODELS) + "-" + synth_type + "Synth-" + str(DATE_TIME) + "-" + str(num) + ".h5"
    file_weights = "IGNORE"
    return file_name, file_weights

fm, fw = get_fileModel_and_fileWeights(num=0, model_name="biLSTM", synth_type="Word")
print(fm)

/content/drive/My Drive/Ensembling/Ens-Models/ens-biLSTM-numModels-10-WordSynth-7Sept-4am-0.h5


In [4]:
stop_words_list, stop_words_dict = load_stop_words_list_and_dict(file_stop_words_bangla)
vocab_list = read_from_file_and_load_vocab()
print("Size of list of vocab", len(vocab_list))
vocab_dict = get_vocab_dictionary(vocab_list)
print(len(vocab_dict))
vocab_size = len(vocab_dict) + 1
print(f"vocab_size = {vocab_size}")

in_file = /content/drive/My Drive/Data_Initial/Bangla-Vocab-22-Aug-withoutEnglish.txt
Size of list of vocab 237931
237931
vocab_size = 237932


In [5]:
## Models with small architectures ##
def getModelLSTM():
    emb_dim=16
    emb_model_content = tf.keras.Sequential([  
        tf.keras.layers.Input(shape=(LEN_DOC_CONTENT,)) , 
        tf.keras.layers.Embedding(vocab_size, output_dim=emb_dim),
        tf.keras.layers.LSTM(64,return_sequences=True,dropout=0.3), # On 9 Aug
        tf.keras.layers.LSTM(16), # can't add LSTM layers one after another, need return-seq = True
        tf.keras.layers.Dense(8, activation='relu'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(2, activation="softmax")
    ])
    return emb_model_content

def getModelCNN():
    ## Start model formation.
    emb_dim=16 # need this
    emb_model_content = tf.keras.Sequential([ 
        tf.keras.layers.Input(shape=(LEN_DOC_CONTENT,)) , 
        tf.keras.layers.Embedding(vocab_size, output_dim=emb_dim),
        tf.keras.layers.Conv1D(filters=16, kernel_size=5, activation='relu'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.MaxPooling1D(pool_size=1),
        tf.keras.layers.GlobalAveragePooling1D(),
        tf.keras.layers.Dense(4, activation='relu'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(2, activation='softmax')
    ])
    return emb_model_content

def getModelBiLSTM():
    emb_dim=16
    emb_model_content = tf.keras.Sequential([  
        tf.keras.layers.Input(shape=(LEN_DOC_CONTENT,)) , 
        tf.keras.layers.Embedding(vocab_size, output_dim=emb_dim),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32,return_sequences=True,dropout=0.3),merge_mode='ave'), # On 9 Aug
        tf.keras.layers.LSTM(16), # can't add LSTM layers one after another, need return-seq = True
        tf.keras.layers.Dense(8, activation='relu'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(2, activation="softmax")
    ])
    return emb_model_content

def getModelBiGRU():
    emb_dim=16
    emb_model_content = tf.keras.Sequential([  
        tf.keras.layers.Input(shape=(LEN_DOC_CONTENT,)) , 
        tf.keras.layers.Embedding(vocab_size, output_dim=emb_dim),
        tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32,return_sequences=True,dropout=0.3),merge_mode='ave'), # On 9 Aug
        tf.keras.layers.GRU(16), # can't add LSTM layers one after another, need return-seq = True
        tf.keras.layers.Dense(8, activation='relu'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(2, activation="softmax")
    ])
    return emb_model_content


def get_model(model_name):
    print(f"Inside get_model(), model_name = {model_name}")
    if model_name == "CNN":
        return getModelCNN()
    elif model_name == "BiLSTM":
        return getModelBiLSTM()
    elif model_name == "BiGRU":
        return getModelBiGRU()
    elif model_name == "LSTM":
        return getModelLSTM()

In [6]:
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import ModelCheckpoint

In [7]:
def fit_for_one_model(model_num, list_encoded_contents_whole, list_labels_whole, BASE_PATH, model_name="CNN", NUM_MODELS=10, synth_type="No"):
    print("Inside fit_for_one_model() for model_num = ", model_num)
    # pad the sequences.
    encoded_content_this_model = list_encoded_contents_whole[model_num]
    padded_docs_this_model = pad_sequences(encoded_content_this_model, maxlen=LEN_DOC_CONTENT, padding='pre') # pad the sequences.

    # encode the labels.
    labels_as_list = list(list_labels_whole[model_num])
    print(labels_as_list[0:100])
    print(labels_as_list[-100:])

    encoder = LabelEncoder()
    encoder.fit(labels_as_list)
    encoded_Y_WHOLE = encoder.transform(labels_as_list)
    dummy_Y_whole = np_utils.to_categorical(encoded_Y_WHOLE)

    ## obtain dict_weights
    (classes, counts) = np.unique(list_labels_whole[model_num], return_counts=True)
    counts_sum = np.sum(counts)
    dict_weights={
        0: counts[1]/counts_sum,
        1: counts[0]/counts_sum
    }
    print("dict_weights = ", dict_weights)

    ## obtain the file name.
    file_model_this, file_weights = get_fileModel_and_fileWeights(num=model_num, model_name=model_name, NUM_MODELS=NUM_MODELS, synth_type=synth_type, BASE_PATH=BASE_PATH)

    ## fit the model.
    print(f"FILE_MODEL_{model_num} = {file_model_this}")
    print(f"FILE_WEIGHTS_{model_num} = {file_weights}")

    EPOCHS = 20 # 10 gave worse results in CNN-ensemble
    BATCH = 16

    emb_model_content = get_model(model_name=model_name)
    checkpoint = ModelCheckpoint(file_weights, monitor='val_get_f1', verbose=1, save_best_only=True, mode='max') # Using wrt f1
    emb_model_content.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc', get_f1, get_recall])
    print("Model compiling done. Now, fit the model. Epochs=", EPOCHS, ", Batch-size=", BATCH)

    # print(emb_model_content.summary())

    emb_model_content.fit(padded_docs_this_model, dummy_Y_whole, epochs = EPOCHS, batch_size = BATCH, # batch_size = 32 before.
                          verbose = 1, validation_split = 0.1, class_weight=dict_weights) ## , callbacks=[checkpoint]) ## NOT USING CALLBACKS
    emb_model_content.save(file_model_this)
    del emb_model_content
    
    print(f"Saved done at file. {file_model_this}")

## Load from pickel files

In [8]:
import pickle

In [9]:
def get_file_names(num_models=10, synth_type="No"):
    file_X = "/content/drive/My Drive/Ensembling/Data-Labels/X_train_" + str(num_models) + "-Models_" + synth_type + "Synth.pkl"
    file_Y = "/content/drive/My Drive/Ensembling/Data-Labels/Y_train_" + str(num_models) + "-Models_" + synth_type + "Synth.pkl"
    return file_X, file_Y

print(get_file_names(num_models=10, synth_type="No"))

def load_data(num_models=10, synth_type="No"):
    file_X, file_Y = get_file_names(num_models=num_models, synth_type=synth_type)
    print(f"In load_data() fileX = {file_X}")
    with open(file_X, 'rb') as f:
        X_train_list = pickle.load(f)
    with open(file_Y, 'rb') as f:
        Y_train_list = pickle.load(f)
    print(len(X_train_list), X_train_list[0].shape)
    return X_train_list, Y_train_list

('/content/drive/My Drive/Ensembling/Data-Labels/X_train_10-Models_NoSynth.pkl', '/content/drive/My Drive/Ensembling/Data-Labels/Y_train_10-Models_NoSynth.pkl')


# For Model = LSTM

## Fit for NoSynthesis

In [10]:
BASE_PATH = "/content/drive/My Drive/Ensembling/Ens-Models/"
MODEL_NAME = "LSTM"
synth_type = "No"

In [11]:
NUM_MODELS_FOR_ENSEMBLING = 29
X_train_list, Y_train_list = load_data(num_models=NUM_MODELS_FOR_ENSEMBLING, synth_type=synth_type)

print(f"NUM_MODELS_FOR_ENSEMBLING = {NUM_MODELS_FOR_ENSEMBLING}")
for model_iter in range(NUM_MODELS_FOR_ENSEMBLING):
    # print(model_iter)
    fit_for_one_model(model_num=model_iter, list_encoded_contents_whole=X_train_list, list_labels_whole=Y_train_list, BASE_PATH=BASE_PATH, 
            model_name=MODEL_NAME, NUM_MODELS=NUM_MODELS_FOR_ENSEMBLING, synth_type=synth_type)
    print("\n\n")

In load_data() fileX = /content/drive/My Drive/Ensembling/Data-Labels/X_train_29-Models_NoSynth.pkl
29 (2993,)
NUM_MODELS_FOR_ENSEMBLING = 29
Inside fit_for_one_model() for model_num =  0
[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0]
[0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 

## Fit for Word-Synthesis

In [12]:
synth_type = "Word"

In [None]:
NUM_MODELS_FOR_ENSEMBLING = 14
X_train_list, Y_train_list = load_data(num_models=NUM_MODELS_FOR_ENSEMBLING, synth_type=synth_type)

print(f"NUM_MODELS_FOR_ENSEMBLING = {NUM_MODELS_FOR_ENSEMBLING}")
for model_iter in range(NUM_MODELS_FOR_ENSEMBLING):
    # print(model_iter)
    fit_for_one_model(model_num=model_iter, list_encoded_contents_whole=X_train_list, list_labels_whole=Y_train_list, BASE_PATH=BASE_PATH, 
            model_name=MODEL_NAME, NUM_MODELS=NUM_MODELS_FOR_ENSEMBLING, synth_type=synth_type)
    print("\n\n")

In load_data() fileX = /content/drive/My Drive/Ensembling/Data-Labels/X_train_14-Models_WordSynth.pkl
14 (6095,)
NUM_MODELS_FOR_ENSEMBLING = 14
Inside fit_for_one_model() for model_num =  0
[1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0]
[0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0

## Fit for Character-Synthesis

In [None]:
synth_type = "Char"

In [None]:
NUM_MODELS_FOR_ENSEMBLING = 14
X_train_list, Y_train_list = load_data(num_models=NUM_MODELS_FOR_ENSEMBLING, synth_type=synth_type)

print(f"NUM_MODELS_FOR_ENSEMBLING = {NUM_MODELS_FOR_ENSEMBLING}")
for model_iter in range(NUM_MODELS_FOR_ENSEMBLING):
    # print(model_iter)
    fit_for_one_model(model_num=model_iter, list_encoded_contents_whole=X_train_list, list_labels_whole=Y_train_list, BASE_PATH=BASE_PATH, 
            model_name=MODEL_NAME, NUM_MODELS=NUM_MODELS_FOR_ENSEMBLING, synth_type=synth_type)
    print("\n\n")

In load_data() fileX = /content/drive/My Drive/Ensembling/Data-Labels/X_train_10-Models_CharSynth.pkl
10 (7347,)
NUM_MODELS_FOR_ENSEMBLING = 10
Inside fit_for_one_model() for model_num =  0
[1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0]
[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0

# Fit for SW synthesis

In [None]:
synth_type = "SW"
NUM_MODELS_FOR_ENSEMBLING = 14
X_train_list, Y_train_list = load_data(num_models=NUM_MODELS_FOR_ENSEMBLING, synth_type=synth_type)

print(f"NUM_MODELS_FOR_ENSEMBLING = {NUM_MODELS_FOR_ENSEMBLING}")
for model_iter in range(NUM_MODELS_FOR_ENSEMBLING):
    # print(model_iter)
    fit_for_one_model(model_num=model_iter, list_encoded_contents_whole=X_train_list, list_labels_whole=Y_train_list, BASE_PATH=BASE_PATH, 
            model_name=MODEL_NAME, NUM_MODELS=NUM_MODELS_FOR_ENSEMBLING, synth_type=synth_type)
    print("\n\n")

In load_data() fileX = /content/drive/My Drive/Ensembling/Data-Labels/X_train_10-Models_SWSynth.pkl
10 (7347,)
NUM_MODELS_FOR_ENSEMBLING = 10
Inside fit_for_one_model() for model_num =  0
[1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0]
[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 

# Now test using Test dataset

In [None]:
print("Done")

Done
