# 1. Load Package

In [190]:
import tensorflow as tf #deep learning framework
from tensorflow.keras.layers import Input, Dense, Embedding, SimpleRNN
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
import pandas as pd #load dataset
import ast #change string into list
from collections import Counter #to trace the amount of samples each class
import plotly.express as px #data visualization
import string # handling punctuation

# 2. Load Dataset

In [2]:
from google.colab import drive
drive.mount('/content/drive/', force_remount = True)

Mounted at /content/drive/


In [80]:
df = pd.read_csv(r'/content/drive/MyDrive/ML/Multilabel Classification/arxiv_data_210930-054931.csv')

# 3. Exploratory Data Analysis

In [4]:
df.head()

Unnamed: 0,terms,titles,abstracts
0,['cs.LG'],Multi-Level Attention Pooling for Graph Neural...,Graph neural networks (GNNs) have been widely ...
1,"['cs.LG', 'cs.AI']",Decision Forests vs. Deep Networks: Conceptual...,Deep networks and decision forests (such as ra...
2,"['cs.LG', 'cs.CR', 'stat.ML']",Power up! Robust Graph Convolutional Network v...,Graph convolutional networks (GCNs) are powerf...
3,"['cs.LG', 'cs.CR']",Releasing Graph Neural Networks with Different...,With the increasing popularity of Graph Neural...
4,['cs.LG'],Recurrence-Aware Long-Term Cognitive Network f...,Machine learning solutions for pattern classif...


In [5]:
# there are 3 features, representing the class, titles, and abstract
df.columns

Index(['terms', 'titles', 'abstracts'], dtype='object')

In [6]:
# there are 56.181 samples
df.shape

(56181, 3)

In [7]:
# it seems that the terms feature is saved as string, we need to change it back into list or array
print(df.terms[2])
type(df.terms[2])

['cs.LG', 'cs.CR', 'stat.ML']


str

In [81]:
# change all elements in df.terms from string into list
df['terms'] = df.terms.apply(ast.literal_eval)

In [9]:
df.head()

Unnamed: 0,terms,titles,abstracts
0,[cs.LG],Multi-Level Attention Pooling for Graph Neural...,Graph neural networks (GNNs) have been widely ...
1,"[cs.LG, cs.AI]",Decision Forests vs. Deep Networks: Conceptual...,Deep networks and decision forests (such as ra...
2,"[cs.LG, cs.CR, stat.ML]",Power up! Robust Graph Convolutional Network v...,Graph convolutional networks (GCNs) are powerf...
3,"[cs.LG, cs.CR]",Releasing Graph Neural Networks with Different...,With the increasing popularity of Graph Neural...
4,[cs.LG],Recurrence-Aware Long-Term Cognitive Network f...,Machine learning solutions for pattern classif...


In [82]:
unique_class = []
# listing all unique classes
for i in df.terms:
  for j in i:
    if j not in unique_class:
      unique_class.append(j)
    else:
      pass

In [83]:
# there are so many unique class, there are 1177 unique class in this dataset
print(unique_class)
len(unique_class)

['cs.LG', 'cs.AI', 'cs.CR', 'stat.ML', 'cs.DC', 'cs.IT', 'math.IT', 'physics.data-an', 'cs.SI', 'cs.DS', '68T30', 'I.5.4', 'cs.CG', 'q-bio.QM', 'cs.CV', 'cs.CL', '68T05', 'cond-mat.dis-nn', 'cond-mat.stat-mech', 'cs.SD', 'eess.AS', 'cs.IR', 'I.2.6', 'cs.SY', 'eess.SY', '68T45 (Primary) 68T10, 68T07 (Secondary)', 'I.4.9; I.5.4; I.2.10', '68T07, 68T30, 68R99', 'I.2.0; I.2.4', 'cs.NA', 'math.NA', '68T07, 05C85, 42C40', 'I.2.4; I.2.6', 'math.ST', 'stat.TH', '62H30 (Primary) 54F45 (Secondary)', 'cs.DM', 'G.1.6; I.2.6', 'eess.SP', 'q-bio.NC', 'quant-ph', '68Txx, 81Pxx', 'I.2', 'cs.DB', 'math.AT', 'math.OC', 'eess.IV', 'cs.AR', 'cs.MM', 'physics.app-ph', 'physics.chem-ph', 'physics.optics', 'stat.CO', '62G08', 'cs.NI', 'stat.AP', 'cs.SE', 'cs.NE', 'econ.GN', 'q-fin.EC', 'cs.MS', 'math.GR', 'math.OA', 'cs.RO', '05C99, 62M45', 'G.2.2', 'stat.ME', 'hep-ex', 'cs.GR', 'math.MG', '42C40, 05C85, 11Y16', 'math.DS', 'math.PR', '62-07, 37H99', 'math.SP', 'math.FA', '37N99, 46E22, 47B32', 'astro-ph.IM',

1177

In [84]:
class_counter = Counter()
# counting every class by iterating through df['terms]
for row in df['terms']:
    class_counter.update(row)
class_counter = pd.DataFrame(class_counter.items(), columns=['class','count'])

# due to there are some class name that very very long we truncated into 10 characters only
trunc =[]
for i in range (0, class_counter['class'].shape[0]):
  j = class_counter['class'][i]
  if len(j) >=10:
    trunc.append(j[:10])
  else:
    trunc.append(j)

class_counter['trunc'] = trunc

In [13]:
class_counter.head()

Unnamed: 0,class,count,trunc
0,cs.LG,30939,cs.LG
1,cs.AI,8390,cs.AI
2,cs.CR,739,cs.CR
3,stat.ML,16570,stat.ML
4,cs.DC,279,cs.DC


In [85]:
fig = px.bar(class_counter, x='trunc', y='count')
fig.show()

In [86]:
# The highest amount of samples is cs.CV with 33k samples
# many of the classes have minimal samples and not even have 1k samples
# for now we will take only the class that have more than equal 500 samples

class_cleaned = class_counter.loc[class_counter['count'] >= 500]
class_cleaned.head()

Unnamed: 0,class,count,trunc
0,cs.LG,30939,cs.LG
1,cs.AI,8390,cs.AI
2,cs.CR,739,cs.CR
3,stat.ML,16570,stat.ML
8,cs.SI,684,cs.SI


In [103]:
# there are only 14 class with more than 500
class_cleaned.shape

(14, 3)

In [109]:
# we need to change the dataset into only have the 14 classes
list_classes = list(class_cleaned['class'])
list_classes

['cs.LG',
 'cs.AI',
 'cs.CR',
 'stat.ML',
 'cs.SI',
 'cs.CV',
 'cs.CL',
 'eess.SP',
 'math.OC',
 'eess.IV',
 'cs.MM',
 'cs.NE',
 'cs.RO',
 'cs.GR']

In [108]:
updated_class_list = []
# deleting all classes that are not included in list_classes
for i in df['terms']:
  #iterating_list = []
  for j in i:
    print(i)
    if j not in list_classes:
      i.remove(j)
    print(i)
      #iterating_list.append(j)
  #updated_class_list.append(iterating_list)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
['cs.CV']
['cs.CV']
['cs.LG', 'cs.SI']
['cs.LG', 'cs.SI']
['cs.LG', 'cs.SI']
['cs.LG', 'cs.SI']
['cs.LG', 'stat.ML']
['cs.LG', 'stat.ML']
['cs.LG', 'stat.ML']
['cs.LG', 'stat.ML']
['cs.LG', 'cs.AI', 'stat.ML']
['cs.LG', 'cs.AI', 'stat.ML']
['cs.LG', 'cs.AI', 'stat.ML']
['cs.LG', 'cs.AI', 'stat.ML']
['cs.LG', 'cs.AI', 'stat.ML']
['cs.LG', 'cs.AI', 'stat.ML']
['cs.LG', 'cs.SI', 'stat.ML']
['cs.LG', 'cs.SI', 'stat.ML']
['cs.LG', 'cs.SI', 'stat.ML']
['cs.LG', 'cs.SI', 'stat.ML']
['cs.LG', 'cs.SI', 'stat.ML']
['cs.LG', 'cs.SI', 'stat.ML']
['cs.LG', 'cs.SI', 'stat.ML']
['cs.LG', 'cs.SI', 'stat.ML']
['cs.LG', 'cs.SI', 'stat.ML']
['cs.LG', 'cs.SI', 'stat.ML']
['cs.LG', 'cs.SI', 'stat.ML']
['cs.LG', 'cs.SI', 'stat.ML']
['stat.ML', 'cs.AI', 'cs.LG']
['stat.ML', 'cs.AI', 'cs.LG']
['stat.ML', 'cs.AI', 'cs.LG']
['stat.ML', 'cs.AI', 'cs.LG']
['stat.ML', 'cs.AI', 'cs.LG']
['stat.ML', 'cs.AI', 'cs.LG']
['stat.ML', 'cs.LG']
['stat.ML', 'c

In [111]:
class_counter1 = Counter()
# counting every class by iterating through df['terms]
for row in df['terms']:
    class_counter1.update(row)
class_counter1 = pd.DataFrame(class_counter1.items(), columns=['class','count'])

# due to there are some class name that very very long we truncated into 10 characters only
trunc =[]
for i in range (0, class_counter1['class'].shape[0]):
  j = class_counter1['class'][i]
  if len(j) >=10:
    trunc.append(j[:10])
  else:
    trunc.append(j)

class_counter1['trunc'] = trunc

In [114]:
class_counter1.shape

(14, 3)

In [110]:
# checking if by deleting classes is there any missing value
df.isna().sum()
# there are no missing value

terms        0
titles       0
abstracts    0
dtype: int64

In [115]:
# because this is multi label problem, we need to change the class from ['class a', 'class b'] into separate feature
# similar to one hot encoding. We will create a new DataFrame called df_class that will create new feature based on the 14 classes
df_class = pd.DataFrame([], columns = list_classes)

#next is merge the 2 dataframe into 1 dataframe
df_processed = pd.concat([df,df_class], axis = 1)
df_processed.head()

Unnamed: 0,terms,titles,abstracts,cs.LG,cs.AI,cs.CR,stat.ML,cs.SI,cs.CV,cs.CL,eess.SP,math.OC,eess.IV,cs.MM,cs.NE,cs.RO,cs.GR
0,[cs.LG],Multi-Level Attention Pooling for Graph Neural...,Graph neural networks (GNNs) have been widely ...,,,,,,,,,,,,,,
1,"[cs.LG, cs.AI]",Decision Forests vs. Deep Networks: Conceptual...,Deep networks and decision forests (such as ra...,,,,,,,,,,,,,,
2,"[cs.LG, cs.CR, stat.ML]",Power up! Robust Graph Convolutional Network v...,Graph convolutional networks (GCNs) are powerf...,,,,,,,,,,,,,,
3,"[cs.LG, cs.CR]",Releasing Graph Neural Networks with Different...,With the increasing popularity of Graph Neural...,,,,,,,,,,,,,,
4,[cs.LG],Recurrence-Aware Long-Term Cognitive Network f...,Machine learning solutions for pattern classif...,,,,,,,,,,,,,,


In [75]:
df_processed.shape

(56181, 17)

In [118]:
#iterating each sample to change the NaN into 1 if the class is available for that sample
for i in range(0, df_processed.shape[0]):
  for j in df_processed['terms'][i]:
    df_processed[j][i] = 1

In [119]:
# checking if the processed succesfully or not
df_processed.head()

Unnamed: 0,terms,titles,abstracts,cs.LG,cs.AI,cs.CR,stat.ML,cs.SI,cs.CV,cs.CL,eess.SP,math.OC,eess.IV,cs.MM,cs.NE,cs.RO,cs.GR
0,[cs.LG],Multi-Level Attention Pooling for Graph Neural...,Graph neural networks (GNNs) have been widely ...,1,,,,,,,,,,,,,
1,"[cs.LG, cs.AI]",Decision Forests vs. Deep Networks: Conceptual...,Deep networks and decision forests (such as ra...,1,1.0,,,,,,,,,,,,
2,"[cs.LG, cs.CR, stat.ML]",Power up! Robust Graph Convolutional Network v...,Graph convolutional networks (GCNs) are powerf...,1,,1.0,1.0,,,,,,,,,,
3,"[cs.LG, cs.CR]",Releasing Graph Neural Networks with Different...,With the increasing popularity of Graph Neural...,1,,1.0,,,,,,,,,,,
4,[cs.LG],Recurrence-Aware Long-Term Cognitive Network f...,Machine learning solutions for pattern classif...,1,,,,,,,,,,,,,


In [120]:
# change from NaN into 0
df_processed = df_processed.fillna(0)
df_processed.head()

Unnamed: 0,terms,titles,abstracts,cs.LG,cs.AI,cs.CR,stat.ML,cs.SI,cs.CV,cs.CL,eess.SP,math.OC,eess.IV,cs.MM,cs.NE,cs.RO,cs.GR
0,[cs.LG],Multi-Level Attention Pooling for Graph Neural...,Graph neural networks (GNNs) have been widely ...,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"[cs.LG, cs.AI]",Decision Forests vs. Deep Networks: Conceptual...,Deep networks and decision forests (such as ra...,1,1,0,0,0,0,0,0,0,0,0,0,0,0
2,"[cs.LG, cs.CR, stat.ML]",Power up! Robust Graph Convolutional Network v...,Graph convolutional networks (GCNs) are powerf...,1,0,1,1,0,0,0,0,0,0,0,0,0,0
3,"[cs.LG, cs.CR]",Releasing Graph Neural Networks with Different...,With the increasing popularity of Graph Neural...,1,0,1,0,0,0,0,0,0,0,0,0,0,0
4,[cs.LG],Recurrence-Aware Long-Term Cognitive Network f...,Machine learning solutions for pattern classif...,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [121]:
df_processed.shape

(56181, 17)

# 4. NLP
---
we need to preprocess the text input so that the model can understand better about the context of the text. The NLP process that will be applied in this dataset is:
1. Lowercasing
2. contractions handling
3. punctuation handling
4. Word based tokenization

## 4.1. Lowercasing

In [None]:
# we are going to use the abstract as the input for our model
# we can easily lowercasing all of the text by using python bult in .lower()
for i in range(0,df_processed['abstracts'].shape[0]):
  df_processed['abstracts'][i] = df_processed['abstracts'][i].lower()

In [129]:
df_processed.head()

Unnamed: 0,terms,titles,abstracts,cs.LG,cs.AI,cs.CR,stat.ML,cs.SI,cs.CV,cs.CL,eess.SP,math.OC,eess.IV,cs.MM,cs.NE,cs.RO,cs.GR
0,[cs.LG],Multi-Level Attention Pooling for Graph Neural...,graph neural networks (gnns) have been widely ...,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"[cs.LG, cs.AI]",Decision Forests vs. Deep Networks: Conceptual...,deep networks and decision forests (such as ra...,1,1,0,0,0,0,0,0,0,0,0,0,0,0
2,"[cs.LG, cs.CR, stat.ML]",Power up! Robust Graph Convolutional Network v...,graph convolutional networks (gcns) are powerf...,1,0,1,1,0,0,0,0,0,0,0,0,0,0
3,"[cs.LG, cs.CR]",Releasing Graph Neural Networks with Different...,with the increasing popularity of graph neural...,1,0,1,0,0,0,0,0,0,0,0,0,0,0
4,[cs.LG],Recurrence-Aware Long-Term Cognitive Network f...,machine learning solutions for pattern classif...,1,0,0,0,0,0,0,0,0,0,0,0,0,0


## 4.2. contractions handling

In [None]:
def expand_contractions(text):
    # Dictionary of common English contractions and their expanded forms
    contractions_dict = {
        "ain't": "am not",
        "aren't": "are not",
        "can't": "cannot",
        "couldn't": "could not",
        "didn't": "did not",
        "doesn't": "does not",
        "don't": "do not",
        "hadn't": "had not",
        "hasn't": "has not",
        "haven't": "have not",
        "he'd": "he would",
        "he'll": "he will",
        "he's": "he is",
        "I'd": "I would",
        "I'll": "I will",
        "I'm": "I am",
        "I've": "I have",
        "isn't": "is not",
        "it's": "it is",
        "let's": "let us",
        "mustn't": "must not",
        "shan't": "shall not",
        "she'd": "she would",
        "she'll": "she will",
        "she's": "she is",
        "shouldn't": "should not",
        "that's": "that is",
        "there's": "there is",
        "they'd": "they would",
        "they'll": "they will",
        "they're": "they are",
        "they've": "they have",
        "we'd": "we would",
        "we'll": "we will",
        "we're": "we are",
        "we've": "we have",
        "weren't": "were not",
        "what'll": "what will",
        "what're": "what are",
        "what's": "what is",
        "what've": "what have",
        "where's": "where is",
        "who'd": "who would",
        "who'll": "who will",
        "who're": "who are",
        "who's": "who is",
        "who've": "who have",
        "won't": "will not",
        "wouldn't": "would not",
        "you'd": "you would",
        "you'll": "you will",
        "you're": "you are",
        "you've": "you have"
    }

    # Replace contractions with their expanded forms
    for contraction, expansion in contractions_dict.items():
        text = text.replace(contraction, expansion)

    return text

for i in range(0,df_processed['abstracts'].shape[0]):
  df_processed['abstracts'][i] =expand_contractions(df_processed['abstracts'][i])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/index

In [133]:
df_processed.head()

Unnamed: 0,terms,titles,abstracts,cs.LG,cs.AI,cs.CR,stat.ML,cs.SI,cs.CV,cs.CL,eess.SP,math.OC,eess.IV,cs.MM,cs.NE,cs.RO,cs.GR
0,[cs.LG],Multi-Level Attention Pooling for Graph Neural...,graph neural networks (gnns) have been widely ...,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"[cs.LG, cs.AI]",Decision Forests vs. Deep Networks: Conceptual...,deep networks and decision forests (such as ra...,1,1,0,0,0,0,0,0,0,0,0,0,0,0
2,"[cs.LG, cs.CR, stat.ML]",Power up! Robust Graph Convolutional Network v...,graph convolutional networks (gcns) are powerf...,1,0,1,1,0,0,0,0,0,0,0,0,0,0
3,"[cs.LG, cs.CR]",Releasing Graph Neural Networks with Different...,with the increasing popularity of graph neural...,1,0,1,0,0,0,0,0,0,0,0,0,0,0
4,[cs.LG],Recurrence-Aware Long-Term Cognitive Network f...,machine learning solutions for pattern classif...,1,0,0,0,0,0,0,0,0,0,0,0,0,0


## 4.3. Punctuation handling

In [136]:
def remove_punctuation(text):
    # Define a translation table with punctuation characters mapped to None
    translator = str.maketrans('', '', string.punctuation)
    # Remove punctuation using translate method
    text_without_punctuation = text.translate(translator)
    return text_without_punctuation

# Example usage
for i in range(0,df_processed['abstracts'].shape[0]):
  df_processed['abstracts'][i] =remove_punctuation(df_processed['abstracts'][i])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/index

In [137]:
df_processed.head()

Unnamed: 0,terms,titles,abstracts,cs.LG,cs.AI,cs.CR,stat.ML,cs.SI,cs.CV,cs.CL,eess.SP,math.OC,eess.IV,cs.MM,cs.NE,cs.RO,cs.GR
0,[cs.LG],Multi-Level Attention Pooling for Graph Neural...,graph neural networks gnns have been widely us...,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"[cs.LG, cs.AI]",Decision Forests vs. Deep Networks: Conceptual...,deep networks and decision forests such as ran...,1,1,0,0,0,0,0,0,0,0,0,0,0,0
2,"[cs.LG, cs.CR, stat.ML]",Power up! Robust Graph Convolutional Network v...,graph convolutional networks gcns are powerful...,1,0,1,1,0,0,0,0,0,0,0,0,0,0
3,"[cs.LG, cs.CR]",Releasing Graph Neural Networks with Different...,with the increasing popularity of graph neural...,1,0,1,0,0,0,0,0,0,0,0,0,0,0
4,[cs.LG],Recurrence-Aware Long-Term Cognitive Network f...,machine learning solutions for pattern classif...,1,0,0,0,0,0,0,0,0,0,0,0,0,0


## 4.4 Tokenization

In [147]:
# Create a tokenizer instance
tokenizer = tf.keras.preprocessing.text.Tokenizer()

# Fit tokenizer on all texts to build a shared vocabulary
tokenizer.fit_on_texts(df_processed['abstracts'])

# Tokenize each text
tokenized_texts = [tokenizer.texts_to_sequences([text])[0] for text in df_processed['abstracts']]

In [148]:
len(tokenized_texts)

56181

In [150]:
# search the maximum length
# and also creating distribution of the maximum length
max = 0
max_list = []
for i in tokenized_texts:
  max_list.append(len(i))
  if len(i) > max:
    max = len(i)
print(max)

498


In [156]:
#number of vocabulary
len(tokenizer.word_index)

101537

In [152]:
# the majority of the abstract length is 150 - 160 and the length is pretty diverse
# we might need to pad or truncate the input text
fig = px.histogram(x = max_list)
fig.show()

In [153]:
# Pad and truncate sequences
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(tokenized_texts, maxlen=200, padding='post', truncating='post')

In [195]:
tf.convert_to_tensor(df_processed[list_classes])

<tf.Tensor: shape=(56181, 14), dtype=int64, numpy=
array([[1, 0, 0, ..., 0, 0, 0],
       [1, 1, 0, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 1, 0, ..., 0, 0, 0]])>

# 5. Modelling

In [185]:
model = Sequential()
model.add(Embedding(input_dim=len(tokenized_texts)+1, output_dim=1024, input_length=200))
model.add(SimpleRNN(256, return_sequences = True))
model.add(SimpleRNN(64, return_sequences = True))
model.add(SimpleRNN(64, return_sequences = False))
model.add(Dense(14, activation = 'sigmoid'))

In [186]:
model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_19 (Embedding)    (None, 200, 1024)         57530368  
                                                                 
 simple_rnn_12 (SimpleRNN)   (None, 200, 256)          327936    
                                                                 
 simple_rnn_13 (SimpleRNN)   (None, 200, 64)           20544     
                                                                 
 simple_rnn_14 (SimpleRNN)   (None, 64)                8256      
                                                                 
 dense_2 (Dense)             (None, 14)                910       
                                                                 
Total params: 57888014 (220.83 MB)
Trainable params: 57888014 (220.83 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [197]:
model.compile(loss='binary_crossentropy',
              optimizer=Adam(learning_rate=0.001),
              metrics=['accuracy'])

In [None]:
r = model.fit(padded_sequences, tf.convert_to_tensor(df_processed[list_classes]), epochs = 3)

Epoch 1/3
Epoch 2/3
Epoch 3/3