In [1]:
from google.colab import drive
drive.mount('/drive')
%cd /drive

Drive already mounted at /drive; to attempt to forcibly remount, call drive.mount("/drive", force_remount=True).
/drive


In [2]:
%cd MyDrive

/drive/MyDrive


In [3]:
!pip install number_parser

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [4]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
from main_bot import *

In [6]:
dataset_preproc = DatasetPreprocessor()

In [7]:
import tensorflow as tf
from transformers import AutoTokenizer

model_name = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [8]:
text = dataset_preproc.read_file('dataset/dataset/snips/train/seq.in')
encoded_texts = dataset_preproc.encode_texts(tokenizer, text)
intent_labels = dataset_preproc.read_intent_labels('dataset/dataset/snips/intent_label.txt')
intent_map = dataset_preproc.get_intent_map(intent_labels)
intents = dataset_preproc.read_file('dataset/dataset/snips/train/label')
encoded_intents = dataset_preproc.encode_intents(intents, intent_map)
slots = dataset_preproc.read_file('dataset/dataset/snips/train/seq.out')
slots_dict = dataset_preproc.get_slot_indexes(text, slots)
slot_label = dataset_preproc.preprocess_slot_labels('dataset/dataset/snips/slot_label.txt')
slot_map = dataset_preproc.get_slot_map(slot_label)
max_len = len(encoded_texts["input_ids"][0])
encoded_slots = dataset_preproc.encode_slots(slots_dict, text, tokenizer, slot_map, max_len)

In [9]:
from transformers import TFBertModel
from tensorflow.keras.layers import Dropout, Dense, GlobalAveragePooling1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy

class JointIntentAndSlotFillingModel(tf.keras.Model):

    def __init__(self, intent_num_labels=None, slot_num_labels=None,
                 model_name=model_name, dropout_prob=0.1):
        super().__init__(name="joint_intent_slot")
        self.bert = TFBertModel.from_pretrained(model_name)
        self.dropout = Dropout(dropout_prob)
        self.intent_classifier = Dense(intent_num_labels,
                                       name="intent_classifier")
        self.slot_classifier = Dense(slot_num_labels,
                                     name="slot_classifier")

    def call(self, inputs, **kwargs):
        # two outputs from BERT
        trained_bert = self.bert(inputs, **kwargs)
        pooled_output = trained_bert.pooler_output
        sequence_output = trained_bert.last_hidden_state
        
        # sequence_output will be used for slot_filling / classification
        sequence_output = self.dropout(sequence_output,
                                       training=kwargs.get("training", False))
        slot_logits = self.slot_classifier(sequence_output)

        # pooled_output for intent classification
        pooled_output = self.dropout(pooled_output,
                                     training=kwargs.get("training", False))
        intent_logits = self.intent_classifier(pooled_output)

        return slot_logits, intent_logits

joint_model = JointIntentAndSlotFillingModel(intent_num_labels=len(intent_map), slot_num_labels=len(slot_map))

opt = Adam(learning_rate=3e-5, epsilon=1e-08)

# two outputs, one for slots, another for intents
# we have to fine tune for both
losses = [SparseCategoricalCrossentropy(from_logits=True),
          SparseCategoricalCrossentropy(from_logits=True)]

metrics = [SparseCategoricalAccuracy("accuracy")]
# compile model
joint_model.compile(optimizer=opt, loss=losses, metrics=metrics)

joint_model = JointIntentAndSlotFillingModel(intent_num_labels=len(intent_map), slot_num_labels=len(slot_map))
joint_model(tf.constant(tokenizer.encode("what is the weather in torre del greco"))[None, :])
joint_model.load_weights('joint_model.h5')

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.
Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initiali

In [10]:
dialog = Dialog("user", 0, joint_model, intent_labels, slot_label, tokenizer)

In [11]:
from GUI import *
view = GUI(dialog)
view.initGUI()

Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h-lv60-self and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


IntSlider(value=5, continuous_update=False, description='Duration (s):', max=20, min=1)

TwoByTwoLayout(children=(Button(description='Record', icon='check', layout=Layout(grid_area='top-left', height…

HBox(children=(Button(description='Export Graph', icon='check', layout=Layout(height='auto', width='60%'), sty…

In [12]:
dialog.save_graph()

In [13]:
!ls

'21 anni chiara'
 APA-MobileMockup.bmpr
 audio_ds.wav
 audio.wav
 CD.drawio
'Colab Notebooks'
'Compleanno chiara'
'Copy of Sezione LSTM_rete1.drawio'
 dataset
'diagramma di pert.drawio'
 example.graphml
'Foto compleanno Martina'
 GUI.py
 IMG_1620.JPG
 IMG-20160402-WA0018.jpg
 IMG-20160402-WA0019.jpg
 IMG-20160402-WA0020.jpg
 IMG-20160402-WA0021.jpg
 IMG-20160402-WA0022.jpg
 IMG_4325.png
 joint_model.h5
'Libri Springer.rar'
 main_bot.py
'[PC ITA] Age Of Empire III.zip'
 __pycache__
 Screenshot_2020-04-09-14-38-36.jpg
 sezione_conv_rete1.drawio
'Sezione LSTM.drawio'
'State chart IUM.drawio'
'StatementOfPurpose-Francesco Longobardi.gdoc'
'StatementOfPurpose-Francesco Longobardi.pdf'
 strategia3.drawio
 Tesi
'Untitled Diagram (1).drawio'
'Untitled Diagram (2).drawio'
'Untitled Diagram.drawio'
'Untitled folder'


In [14]:
text = dataset_preproc.read_file('dataset/dataset/snips/test/seq.in')
intent_labels = dataset_preproc.read_intent_labels('dataset/dataset/snips/test/label')
#print(text[:100], intent_labels[:100])
MetricsCalculator.intentMetrics(intent_labels[:100], text[:100], dialog)

(0.12, 0.12, 0.12, 0.12)