## 1. TRENIRANJE

In [4]:
import sys
import os
import tensorflow as tf

sys.path.append("../libs")
from classification import input_data
from classification import models
from classification import trainer
from classification import freeze

tf.reset_default_graph()

**FLAGS**

Modeli strojnog učenja često zahtjevaju da mnogi parametri budu zadani na početku modela, te da se ti isti parametri po mogućnosti lako mijenjaju, kao što je npr. broj neurona u neuronskoj mreži, *learning rate*, *batch size* itd. Korištenje zastavica (*flags*) je jednostavan način kako bi preko komandne linije mijenjali neki parameter u kodu. Prvi argument je ime zastavice, drugi je zadana vrijednost, a treći opis zastavice.

In [5]:
def del_all_flags(FLAGS):
    flags_dict = FLAGS._flags()
    keys_list = [keys for keys in flags_dict]
    for keys in keys_list:
        FLAGS.__delattr__(keys)





del_all_flags(tf.flags.FLAGS)

In [6]:

flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('data_dir','../data/raw','Train Data Folder')
flags.DEFINE_string('summaries_dir','../summaries','Summaries Folder')
flags.DEFINE_string('models_dir','../models','Models Folder')
flags.DEFINE_string('wanted_words','yes,no,up,down,left,right,on,off,stop,go','Wanted Words')
flags.DEFINE_float('validation_percentage',10,'Validation Percentage')
flags.DEFINE_float('testing_percentage',10,'Testing Percentage')
flags.DEFINE_integer('sample_rate',16000,'Sample Rate')
flags.DEFINE_integer('clip_duration_ms',1000,'Clip Duration in ms')
flags.DEFINE_float('window_size_ms',40,'How long each spectogram timeslice is')
flags.DEFINE_float('window_stride_ms',20,'How far to move in time between frequency windows.')
flags.DEFINE_integer('dct_coefficient_count',40,'How many bins to use for the MFCC fingerprint')
flags.DEFINE_float('time_shift_ms',100.0,'Range to randomly shift the training audio by in time.')

FLAGS=flags.FLAGS

**MODEL**

Kao arhitektura modela izabrana je LSTM neuronska mreža, posebna vrsta rekurentnih neuronskih mreža koja je dobra za učenje dugotrajnih ovisnosti i pamti informacije u dužem periodu.
Neki od parametara koji su zadani su:

*silence_percentage* – povećavanjem će model raditi u korist *true positive* za *silence* i smanjenjem *false negatives* za riječi

*unknown_percentage* – zadano je da 10% podataka za treniranje bira iz *unknown* klase, povećavanjem će model manje vjerojatno zamijeniti uknown riječi za željene, ali ako je prevelik model će zaključiti da je najsigurnije sve riječi kategorizirati kao uknown

*learning_rate* – zadan je za određeni broj iteracija; kontrolira brzinu ažuriranja težina neuronske mreže

*background_volume*  - glasnoća; 0 je tišina a 1 dokraja pojačana glasnoća

*background_frequency* - u kojoj proporciji je background noise dodan u zvučne zapise 

*batch_size* - broj primjeraka iz uzorka za treniranje u jednoj iteraciji treniranja 

In [7]:
model_architecture='lstm_l'
logging_interval=10
eval_step_interval=500
save_step_interval=50000
silence_percentage=10.0
unknown_percentage=15.0
background_frequency=1
background_volume=0.3
learning_rate='0.0005,0.0001,0.00002,0.0001,0.00002' 
train_steps='10000,10000,10000,15000,5000' 
batch_size=256


In [8]:
tf.app.flags.DEFINE_string('f', '', 'kernel')
train_dir=os.path.join(FLAGS.data_dir,'train','audio')

In [9]:
model_settings = models.prepare_model_settings(
      len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))),
      FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
      FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
audio_processor = input_data.AudioProcessor(
      train_dir, silence_percentage, unknown_percentage,
      FLAGS.wanted_words.split(','), FLAGS.validation_percentage,
      FLAGS.testing_percentage, model_settings,use_silence_folder=True)

../data/raw\train\audio\*\*.wav
..\data\raw\train\audio\_background_noise_\doing_the_dishes.wav
..\data\raw\train\audio\_background_noise_\dude_miaowing.wav
..\data\raw\train\audio\_background_noise_\exercise_bike.wav
..\data\raw\train\audio\_background_noise_\pink_noise.wav
..\data\raw\train\audio\_background_noise_\running_tap.wav
..\data\raw\train\audio\_background_noise_\white_noise.wav
Tensor("AudioSpectrogram:0", shape=(?, 49, 513), dtype=float32)


In [10]:
def get_train_data(args):
    sess=args
    time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000)
    train_fingerprints, train_ground_truth = audio_processor.get_data(
        batch_size, 0, model_settings,background_frequency,
        background_volume, time_shift_samples, 'training', sess)
    return train_fingerprints,train_ground_truth

In [11]:
def get_val_data(args):
    '''
    Input: (sess,offset)
    '''
    sess,i=args
    validation_fingerprints, validation_ground_truth = (
            audio_processor.get_data(batch_size, i, model_settings, 0.0,
                                     0.0, 0, 'validation', sess))
    return validation_fingerprints,validation_ground_truth

In [12]:
def get_test_data(args):
    '''
    Input: (sess,offset)
    '''
    sess,i=args
    test_fingerprints, test_ground_truth = audio_processor.get_data(
        batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess)
    return test_fingerprints,test_ground_truth

**PLACEHOLDER**  
Dosad smo upotrebljavali varijable za upravljanje podacima, no postoji osnovnija struktura- *placeholder*. *Placeholder* je jednostavno varijabla kojoj ćemo naknadno dodijeliti vrijednost. To nam omogućuje stvaranje mjesta u memoriji gdje ćemo kasnije pohraniti vrijednosti.

In [13]:
def main(_):
    sess=tf.InteractiveSession()
    # Placeholders
    fingerprint_size = model_settings['fingerprint_size']
    label_count = model_settings['label_count']
    fingerprint_input = tf.placeholder(
      tf.float32, [None, fingerprint_size], name='fingerprint_input')
    ground_truth_input = tf.placeholder(
      tf.float32, [None, label_count], name='groundtruth_input')
    set_size = audio_processor.set_size('validation')
    label_count = model_settings['label_count']
    
    #Model
    
    logits, dropout_prob = models.create_model(
      fingerprint_input,
      model_settings,
      model_architecture,
      is_training=True)
    
    
    extra_args=(dropout_prob,label_count,batch_size,set_size)
    trainer.train(sess,logits,fingerprint_input,ground_truth_input,get_train_data,
                  get_val_data,train_steps,learning_rate,eval_step_interval, logging_interval=logging_interval,
                  checkpoint_interval=save_step_interval,
                  model_name=model_architecture,summaries_dir=FLAGS.summaries_dir,args=extra_args)

**TRENIRANJE**

Tijekom treniranja vidimo na kojem smo koraku treniranja. Za svakih deset koraka su nam pokazane *accuracy* (točnost) i *cross entropy* vrijednosti. Točnost  nam pokazuje koliko je klasa točno predviđeno na određenom koraku treniranja. Ovaj rezultat često varira, ali se u pravilu povećava kako treniranje mreže ide kraju. *Cross entropy* je rezultat funkcije gubitka, i vrijednost bi trebala padati tijekom treniranja. Nakon svakih 500 koraka dobivamo matricu konfuzije.

In [None]:
tf.app.run(main=main)