# Image vs Math

In [None]:
# changing to dark theme- used once then commented.  # https://github.com/dunovank/jupyter-themexes

In [None]:
# ! pip install jupyterthemes

In [None]:
# ! jt -t monokai
# ! jt -t onedork   # nicer colors for plotting

In [None]:
# new imports 
import os
import pandas as pd
import numpy as np
import IPython.display as ipd
from glob import glob
import matplotlib.pyplot as plt
%matplotlib inline

# imports from prev proj
import sys
import librosa
import librosa.display
from pathlib import Path
import csv
import warnings  # record warnings from librosa
from tqdm import tqdm
from audioread import NoBackendError
import datetime
import pickle as pkl
from pathlib import Path

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import train_test_split
from pickle import dump
from pickle import load
from sklearn.metrics import accuracy_score

from pydub.utils import make_chunks
from pydub import AudioSegment



from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dropout, MaxPooling2D, Activation,Dense
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.models import load_model

    
    
from shutil import copyfile



In [None]:
#  global objects
# todo add private/public inside class
class global_For_Clf():
    def __init__(self, clf_label):
        #  changed for every class (for example: scream, cry, ...)
        self.clf_label = clf_label  # have to create a clf with a label

        # keeping the hardcoded 20 mfcc below until end of project submission, later update it to generic mfcc amount
        self.data_file_path = 'csv/'+str(self.get_clf_label())+'/data_'+str(self.get_clf_label())+'_mfcc_20.csv'
        self.csv_to_pkl_path = 'pickle/'+str(self.get_clf_label())+'/combined_lower_amount.pkl' # relevant to modular file TODO currently this is only for scream
        self.path_csv_train_test_data = 'csv/'+str(self.get_clf_label())+'/train_test_data.csv'  # chosen 1:1 ratio data, selected from data.csv
        self.resultsPath = 'results/'+str(self.get_clf_label())+'/experiments_results.csv'

        # end of class changes

        self.n_mfcc = 20  # lev's initial value here was 40- this is the feature resolution- usually between 12-40
        self.k_folds = 5  # amount of folds in k-fold
        # inside create_csv() more columns will be added to the csv head
        # TODO lev-future_improvement edit/add to get better results
        self.csv_initial_head = 'filename spectral_centroid zero_crossings spectral_rolloff chroma_stft rms mel_spec'

        self.min_wav_duration = 0.5  # wont use shorter wav files

        self.nearMissRatio = 2  # 2 means <positives amount>/2
        #                           which means were taking 50% from nearMiss_<clf label> for negatives

        self.nearMiss_samples = -1  # -1 is initial invalid value which will be changed on relevant functions
        self.nearMissLabel = "NearMiss_" + str(self.clf_label)

        self.Kfold_testSize = 0.2

        self.sampling_data_repetitions = 5  # sampling randomly the data to create 1:1 ratio
        self.k_fold_repetitions: int = 5  # doing repeated k-fold for better evaluation

        self.positives = -1  # -1 represents invalid value as initial value
        self.negatives = -1

        self.try_lower_amount = np.inf

        self.model = None  # here a model will be saved- the saved model shouldn't be trained
        self.finalModelsPath = 'models/final_models'
        self.isTrained = False

        self.userInput = ''
        
        self.split_by_sec = 3  # split every X seconds
        self.tested_file_name = ''  # name of file for prediction
        self.predictor_pos_percent_condition = 0  # above 0.XX will consider as positive prediction 0 means dont use this


    def getInputDim(self):
        amount = len(self.csv_initial_head.split()) + self.n_mfcc - 1  # -1 because filename isnt a feature
        return amount

    def get_total_samples(self):
        return self.positives + self.negatives

    def get_model_name(self):
        model_name = (type(self.model)).__name__
        return model_name

    def get_clf_label(self):
            return self.clf_label


#  exceptions
class NotEnoughPositiveSamples(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)


In [None]:
def extract_feature_to_csv(wav_path, label, data_file_path, min_wav_duration, fcc_amount):
    """

    :return: writes one row to wav_path with extracted features

    """
    # extract features for a wav file
    wav_name = wav_path.name  # 110142__ryding__scary-scream-4.wav
    wav_name = wav_name.replace(" ", "_")  # lev bug fix to align csv columns

    """
    # lev upgrading error tracking- know which file caused the error
    try:
    """
    wav_data, sampling_rate = librosa.load(wav_path, duration=5, sr=22050)

    wav_duration = librosa.get_duration(y=wav_data, sr=sampling_rate)

    # lev- dont use really short audio
    if (wav_duration < min_wav_duration):
        print("skipping " + wav_name + " ,duration= " + str(wav_duration))
        return

    with warnings.catch_warnings(record=True) as feature_warnings:
        #  spectral_centroid
        feature_wav_spec_cent = librosa.feature.spectral_centroid(y=wav_data, sr=sampling_rate)
        #  print(feature_wav_spec_cent.shape)  #  (1, 216)

        #  zero crossings
        zcr = librosa.feature.zero_crossing_rate(wav_data)
        #  print("sum "+ str(np.sum(zcr)))

        #  spectral_rolloff
        rolloff = librosa.feature.spectral_rolloff(y=wav_data, sr=sampling_rate)
        # print(rolloff.shape)
        # print(rolloff[0][0:3])

        #  chroma_stft
        chroma_stft = librosa.feature.chroma_stft(y=wav_data, sr=sampling_rate)
        #  print(chroma_stft.shape)

        #  rms and mfccs
        n_mfcc = fcc_amount  # resolution amount
        mfccs = librosa.feature.mfcc(y=wav_data, sr=sampling_rate, n_mfcc=n_mfcc)
        S, phase = librosa.magphase(mfccs)
        rms = librosa.feature.rms(S=S)
        #  print(rms.shape)

        # mel spectogram
        mel_spec = librosa.feature.melspectrogram(y=wav_data, sr=sampling_rate)

        # mfccs
        #  print(mfccs.shape)
        # if there ara warnings- print and continue- for example Warning: Trying to estimate tuning from empty frequency set
        # this is an OK warning- it just means that its really quiet..as in street ambient during the evenning..its a
        # good negative example.
        if len(feature_warnings) > 0:
            for feature_warning in feature_warnings:
                print("Warning: {} Triggered in:\n {}\nwith a duration of {} seconds.\n".format(
                    feature_warning.message, wav_path, wav_duration))

        # got here - no warnings for this wav_path
        # normalize what isnt normalized
        to_append = f'{wav_name} {np.mean(feature_wav_spec_cent)} {np.mean(zcr)} {np.mean(rolloff)} {np.mean(chroma_stft)}' \
                    f' {np.mean(rms)} {np.mean(mel_spec)}'
        for e in mfccs:
            to_append += f' {np.mean(e)}'

        to_append += f' {label}'

        #  save to csv (append new lines)
        file = open(data_file_path, 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

        #  print(to_append)


def create_csv(data_file_path, min_wav_duration, n_mfcc_number, header, fcc_amount,source="train"):
    """
    input: uses screamGlobals for input
    output: .csv file with screamGlobals.csv_initial_head columns
    """
    # important variables
    # data_file_path = clfGlobals.data_file_path
    # min_wav_duration = clfGlobals.min_wav_duration
    #  print(data_file_path, min_wav_duration)
    """
    #  prevent data file over run by accident
    if os.path.exists(data_file_path):
        text = input(f'Press the space bar to override {data_file_path} and continue with the script')
        if text != ' ':
            sys.exit('User aborted script, data file saved :)')
    """
    # logic modification- just verify corectness if file exists- never override.
    if os.path.exists(data_file_path):
        # verify table fits the mfcc number- if True- return (continue with script as usuall), else- raise Error
        # n_mfcc_number = clfGlobals.n_mfcc
        with open(data_file_path) as csvFile:
            reader = csv.reader(csvFile)
            field_names_list = next(reader)  # read first row only (header)
            mfcc_list = [x for x in field_names_list if x.startswith("mfcc")]
            len_actual_mfcc_features = len(mfcc_list)
        if len_actual_mfcc_features == n_mfcc_number:
            print(f'OK: {len_actual_mfcc_features} ==  n_mfcc_number={n_mfcc_number}')
            return
        else:
            raise Exception(f'len_actual_mfcc_features'
                            f'(mfcc inside {data_file_path}={len_actual_mfcc_features},'
                            f' but n_mfcc_number(inside globals class of this script)={n_mfcc_number},'
                            f' values must be equal.')

    # create header for csv
    # header = clfGlobals.csv_initial_head
    # fcc_amount = clfGlobals.n_mfcc
    for i in range(1, fcc_amount + 1):
        header += f' mfcc_{i}'
    header += ' label'
    header = header.split()  # split by spaces as default

    file = open(data_file_path, 'w', newline='')
    with file:
        writer = csv.writer(file)
        writer.writerow(header)

    # load features from each wav file- put inside the lines below as a function

    # reaching each wav file
    path_train = Path(source)
    for path_label in sorted(path_train.iterdir()):
        print("currently in : " + str(path_label))  # train\negative
        positiveOrNegative = path_label.name  # negative
        #  print(label)
        for path_class in tqdm(sorted(path_label.iterdir())):
            # print info
            print("currently in class: " + str(path_class))
            # print amount of files in directory
            onlyfiles = next(os.walk(path_class))[2]  # dir is your directory path as string
            wav_amount: int = len(onlyfiles)
            print("wav amount= " + str(wav_amount))
            #  true_class= path_class.name
            #  print(true_class)
            #  print(path_class)  #  train\negative\scream
            #  print("name: "+ str(path_class.name))

            # lev improvement according to coordination with mori- irrelevant since 7.8.19
            if (positiveOrNegative == "positive"):
                label = path_class.name  # scream
            else:
                """
                lev- updating to differentiate near misses and far misses.
                keeping if-else structure for future options

                old:
                print(f"switching label from {path_class.name} to <negative>")  # added reporting
                label = "negative"
                new:

                """
                label = path_class.name  # NearMiss_scream

            wave_file_paths = path_class.glob('**/*.wav')  # <class 'generator'>
            #  print(type(wave_file_paths))
            count = 0  # for progress tracking
            print('covered WAV files: ')
            for wav_path in sorted(wave_file_paths):
                wav_path = Path(wav_path)
                count += 1
                if (count % 50) == 0:
                    fp = sys.stdout
                    print(str(count), end=' ')
                    fp.flush()  # makes print flush its buffer (doesnt print without it)
                #  print(type(wav_path))  #  <class 'pathlib.WindowsPath'>
                #  print(wav_path)  #  train\positive\scream\110142__ryding__scary-scream-4.wav
                #  print(wav_path.name)  #  110142__ryding__scary-scream-4.wav
                try:
                    #  keeping as parameters data_file_path, min_wav_duration even though its in screamGlobals
                    #  in order to emphasis its an inner function of create_csv()
                    extract_feature_to_csv(wav_path, label, data_file_path, min_wav_duration, fcc_amount)
                except NoBackendError as e:
                    print("audioread.NoBackendError " + "for wav path " + str(wav_path))
                    continue  # one file didnt work, continue to next one

                    
def create_lower_bound_data_panda(csv_path, label, clf_lowerAmount, clf_train_test_data):
    """
    note(lev): because usually we will have more negatives than positives then this function
        chooses randomly the negatives samples so that it will have 1:1 ratio with the true label
        and within the amount of false labels, it Stratifies to keep the same ratio of
        Near Misses for both train and test data.
        (this has proven to increase the k-fold average accuracy from 0.45 to 0.85

    if supplied a lower_bound which is lower than the negatives or positives amount it will
    act as above but with |"lowe bound"| positives and |"lower bound"| negatives
    """

    print(f'choosing max samples randomly while preserving 1:1 ratio for {label}:<all the rest as one group>')
    # use Pandas package for reading csv
    data_csv = pd.read_csv(csv_path)
    # print(data_csv[data_csv.label == 'scream'])  #  [367 rows x 47 columns]
    # print(len(data_csv[data_csv.label == 'scream']))  # 367

    # find lower amount from types of labels

    pos_amount = len(data_csv[data_csv.label == label])
    neg_amount = len(data_csv[data_csv.label != label])
    print("positives: " + str(pos_amount) + " negatives: " + str(neg_amount))
    lower_amount = min(pos_amount, neg_amount, clf_lowerAmount)
    print("lower bound: " + str(lower_amount))

    """
    in my previous project I used near misses, but in the project "Image vs Math i'm starting with 
    simplified experiments- so i'm changing the logic to use only Far_miss"
    """
    
    """
        # take Max of 50% from NearMiss_<clf label> and then choose randomly from the rest of negatives
    # lev bug fix should take lower amount as the Numerator
    #nearMissMaxAmount = pos_amount // screamGlobals.nearMissRatio
    nearMissMaxAmount = lower_amount // clfGlobals.nearMissRatio
    #  print("near miss max amount: ",nearMissMaxAmount)

    data_csv_negatives_nearMiss = data_csv.loc[data_csv.label == clfGlobals.nearMissLabel, :]  # take all valid rows
    nearMissActualAmount = len(data_csv_negatives_nearMiss)
    NearMissAmountToTake = nearMissActualAmount if nearMissActualAmount < nearMissMaxAmount else nearMissMaxAmount
    clfGlobals.nearMiss_samples = NearMissAmountToTake
    print(f"take {NearMissAmountToTake} near misses")
    # take near misses for this classifier
    data_csv_negatives_NearMiss = data_csv_negatives_nearMiss.sample(n=NearMissAmountToTake)

    # take random negatives that aren't near miss
    negatives_amount_left_to_take = lower_amount - NearMissAmountToTake
    #lev- bug fix: assert should be valid if left expression also "equals 0"
    assert (negatives_amount_left_to_take >= 0)
    rest_of_negatives = data_csv.loc[
        ~data_csv['label'].isin([label, clfGlobals.nearMissLabel])]  # take all valid rows

    negatives_lower_amount_samples = data_csv_negatives_NearMiss.append(
        rest_of_negatives.sample(n=negatives_amount_left_to_take))
    assert (len(negatives_lower_amount_samples) == lower_amount)
    # prepare for results tracking

    """
    # clfGlobals.positives = lower_amount
    # clfGlobals.negatives = lower_amount

    #  positives - taking random rows
    data_csv_positives = data_csv[data_csv.label == label]
    # create pandas dataframe with lower_amount rows randomly
    positives_lower_amount_samples = data_csv_positives.sample(n=lower_amount)
    
    # take negatives
    negatives = data_csv.loc[
    ~data_csv['label'].isin([label])]  # take all valid rows  ~data_csv['label'].isin([label, clfGlobals.nearMissLabel])]   
    negatives_lower_amount= negatives.sample(n=lower_amount)
    
    # combine
    combined_lower_amount = positives_lower_amount_samples
    # have to assign, returns appended datadrame
    combined_lower_amount = combined_lower_amount.append(negatives_lower_amount)
    # print(len(combined_lower_amount))  # 734 ,when lower bound: 367

    """"
    dont need  safe override and data analysis in evaluation process
    # saving pandas dataframe to csv - for data analysis purposes
    #  TODO lev future - maybe build a function- you already copied this logic 3 times
    if os.path.exists(screamGlobals.path_csv_train_test_data):
        text = input(f'Press the space bar to override {screamGlobals.path_csv_train_test_data} and continue with the script')
        if text != ' ':
            sys.exit('User aborted script, pickle file saved :)')
    combined_lower_amount.to_csv(screamGlobals.path_csv_train_test_data)

    #TODO RETURN HERE LINES OF CODE FOR EDITING LABELS 

    # saving pandas dataframe to pickle - modularity
    #  prevent pickle file over run by accident
    if os.path.exists(screamGlobals.csv_to_pkl_path):
        text = input(f'Press the space bar to override {screamGlobals.csv_to_pkl_path} and continue with the script')
        if text != ' ':
            sys.exit('User aborted script, pickle file saved :)')
    combined_lower_amount.to_pickle(screamGlobals.csv_to_pkl_path)
    """

    assert (len(combined_lower_amount) == lower_amount * 2)
    combined_lower_amount.to_csv(clf_train_test_data)
    return combined_lower_amount

def get_scaled(np_df):
    scaler = StandardScaler()
    scaler.fit(np_df)  # must call fit before calling transform.fitting on train, using on train+test+valid
    scaled = scaler.transform(np_df)    
    return scaled


def plot_math_medians():
    samples =  pd.read_csv(clfGlobals.path_csv_train_test_data)
    pos = samples[samples.label == clfGlobals.get_clf_label()]
    neg = samples[samples.label != clfGlobals.get_clf_label()]
    pos = pos.loc[:, ~pos.columns.isin(['Unnamed: 0','filename','label'])]  # take only features
    neg = neg.loc[:, ~neg.columns.isin(['Unnamed: 0','filename','label'])]  # take only features
    np_pos= pos.to_numpy()
    np_neg= neg.to_numpy()
    np_pos_scaled_mean= np.mean(get_scaled(np_pos),axis=0,dtype=np.float64)
    np_neg_scaled_mean= np.mean(get_scaled(np_neg),axis=0,dtype=np.float64)

    features = pos.columns
    # plot
    min_a=np.min(np_pos_scaled_mean)
    max_a=np.max(np_pos_scaled_mean)
    min_b=np.min(np_neg_scaled_mean)
    max_b=np.max(np_neg_scaled_mean)
    min_t=np.minimum(min_b,min_a )
    max_t=np.maximum(max_b,max_a )


    plt.figure(figsize=(50, 5))
    ax = plt.subplot(132)
    ax.set_xticklabels(features, rotation=30)
    axes = plt.gca()
    axes.set_ylim([min_t, max_t])

    plt.scatter(features, np_pos_scaled_mean,c='green')
    plt.scatter(features, np_neg_scaled_mean,c='red')
    plt.suptitle('Mean of Features: Positive vs Negative')
    ax.set_xlabel('Features', fontsize=12)
    ax.set_ylabel('Mean value', fontsize=12)
    plt.tight_layout()
    plt.savefig('features_mean-Pos_vs_Neg')
    plt.show()
    
def normalize(x, axis=0):
    return minmax_scale(x, axis=axis)    


def create_image_dataset_from_wav_dataset(source="train"):
    """
    input: No input.
    output: Void, but extracts features by "plot", and saves into destination accordint to the next
            structure: 
            <folder of wav files>/<folder of wav files>_images/ <name of wav file>_<feature type>.jpeg
            for rxample: train/positive/scream/1.wav   ==> save an image with the files features as plot in
                         train/positive/scream/scream_images/1_MFCC.jpeg
    """
    # reaching each wav file
    path_train = Path(source)
    for path_label in sorted(path_train.iterdir()):
        print("currently in : " + str(path_label))  # train\negative
        positiveOrNegative = path_label.name  # negative
        #  print(label)
        for path_class in tqdm(sorted(path_label.iterdir())):
            # print info
            print("currently in class: " + str(path_class))
            # print amount of files in directory
            onlyfiles = next(os.walk(path_class))[2]  # dir is your directory path as string
            wav_amount: int = len(onlyfiles)
            print("wav amount= " + str(wav_amount))
            #  true_class= path_class.name
            #  print(true_class)
            #  print(path_class)  #  train\negative\scream
            #  print("name: "+ str(path_class.name))

            # lev improvement according to coordination with mori- irrelevant since 7.8.19
            if (positiveOrNegative == "positive"):
                label = path_class.name  # scream
            else:
                """
                lev- updating to differentiate near misses and far misses.
                keeping if-else structure for future options

                old:
                print(f"switching label from {path_class.name} to <negative>")  # added reporting
                label = "negative"
                new:

                """
                label = path_class.name  # NearMiss_scream

            wave_file_paths = path_class.glob('**/*.wav')  # <class 'generator'>
            #  print(type(wave_file_paths))
            count = 0  # for progress tracking
            print('covered WAV files to images: ')
            for wav_path in sorted(wave_file_paths):
                wav_path = Path(wav_path)
                count += 1
                if (count % 50) == 0:
                    fp = sys.stdout
                    print(str(count), end=' ')
                    fp.flush()  # makes print flush its buffer (doesnt print without it)
                #  print(type(wav_path))  #  <class 'pathlib.WindowsPath'>
                #  print(wav_path)  #  train\positive\scream\110142__ryding__scary-scream-4.wav
                #  print(wav_path.name)  #  110142__ryding__scary-scream-4.wav
                try:
                    wav_data, sampling_rate = librosa.load(wav_path, duration=5, sr=22050)
                    img_path = str(path_class) + "\\" + str(path_class.name) +"_images\\" +str(wav_path.stem)
                    #print(img_path)
                    extract_features(wav_data,sampling_rate,img_path )

                except NoBackendError as e:
                    print("audioread.NoBackendError " + "for wav path " + str(wav_path))
                    continue  # one file didnt work, continue to next one
                    

def extract_features(wav_data,sampling_rate,img_path):
    """
    wrapper for extracting features as images
    """
    extract_save_mfccs(wav_data,sampling_rate,img_path)

def extract_save_mfccs(wav_data,sampling_rate,img_path):
    img_path_save = img_path + "_MFCC.jpeg"
    # print('img_path_save= ' + img_path_save)

    plt.figure(figsize=(10, 8))
    ax_6 = plt.subplot(2, 1, 1)
    mfccs = librosa.feature.mfcc(y=wav_data, sr=sampling_rate)
    librosa.display.specshow(normalize(mfccs), x_axis='time', sr=sampling_rate)

    plt.suptitle('Normalized MFCCS',y=1.03,x=0.43)
    plt.tight_layout()

    plt.savefig(img_path_save, bbox_inches='tight')
    # plt.show()    
    
    
def create_entire_dataset():
    """
    creates the entire dataset.images and csv file.
    """
    clfGlobals = global_For_Clf('scream')
    create_csv(clfGlobals.data_file_path, clfGlobals.min_wav_duration, clfGlobals.n_mfcc, clfGlobals.csv_initial_head, clfGlobals.n_mfcc)
    create_lower_bound_data_panda(clfGlobals.data_file_path, clfGlobals.get_clf_label(), clfGlobals.try_lower_amount, clfGlobals.path_csv_train_test_data)  
    create_image_dataset_from_wav_dataset()
    
    

# Exploring the data

In [None]:
# create csv with features 
clfGlobals = global_For_Clf('scream')
create_csv()

In [None]:
create_lower_bound_data_panda(clfGlobals.data_file_path, clfGlobals.get_clf_label())

In [None]:
# plot_math_medians() includes this and the next cell

samples =  pd.read_csv(clfGlobals.path_csv_train_test_data)
pos = samples[samples.label == clfGlobals.get_clf_label()]
neg = samples[samples.label != clfGlobals.get_clf_label()]
pos = pos.loc[:, ~pos.columns.isin(['Unnamed: 0','filename','label'])]  # take only features
neg = neg.loc[:, ~neg.columns.isin(['Unnamed: 0','filename','label'])]  # take only features
np_pos= pos.to_numpy()
np_neg= neg.to_numpy()
np_pos_scaled_mean= np.mean(get_scaled(np_pos),axis=0,dtype=np.float64)
np_neg_scaled_mean= np.mean(get_scaled(np_neg),axis=0,dtype=np.float64)

features = pos.columns


In [None]:
# plot

min_a=np.min(np_pos_scaled_mean)
max_a=np.max(np_pos_scaled_mean)
min_b=np.min(np_neg_scaled_mean)
max_b=np.max(np_neg_scaled_mean)
min_t=np.minimum(min_b,min_a )
max_t=np.maximum(max_b,max_a )


plt.figure(figsize=(50, 5))
ax = plt.subplot(132)
ax.set_xticklabels(features, rotation=30)
axes = plt.gca()
axes.set_ylim([min_t, max_t])

plt.scatter(features, np_pos_scaled_mean,c='green')
plt.scatter(features, np_neg_scaled_mean,c='red')
plt.suptitle('Mean of Features: Positive vs Negative')
ax.set_xlabel('Features', fontsize=12)
ax.set_ylabel('Mean value', fontsize=12)
plt.tight_layout()
plt.savefig('features_mean-Pos_vs_Neg')
plt.show()

show differences in Images from Positives & Negatives (Image aspect in Image vs Math)

In [None]:
# quick check of audio- how it sounds
pos_wav_path = "train\positive\scream\\1_scream_female_room.wav"
#ipd.Audio(filename=pos_wav_path,rate=22050)
neg_wav_path = "train\\negative\\Far_miss\\0d20191008163744pnull_sec_start_0.wav"
ipd.Audio(filename=neg_wav_path,rate=22050)



In [None]:
wav_paths = [pos_wav_path, neg_wav_path ]

In [None]:
# spectral centroids in red
pos_wav_data, pos_sampling_rate = librosa.load(pos_wav_path, duration=5, sr=22050)
neg_wav_data, neg_sampling_rate = librosa.load(neg_wav_path, duration=5, sr=22050)
# Normalising the spectral centroid for visualisation
def normalize(x, axis=0):
    return minmax_scale(x, axis=axis)

feature_spectralCentroid = librosa.feature.spectral_centroid(y=neg_wav_data, sr=pos_sampling_rate)[0]
# feature_wav_spec_cent.shape  # (1, 120)
# compute time for visualization
frames = range(len(feature_spectralCentroid))
t = librosa.frames_to_time(frames)  # shape (120,)

plt.figure(figsize=(25,5))
ax_2 = plt.subplot(133)
# plt.suptitle('Normalized Spectral Centroids- Positive vs Negative')
#Plotting the Spectral Centroid along the waveform
librosa.display.waveplot(neg_wav_data, sr=neg_sampling_rate,
                         alpha=0.4,label='Negative(Chat)',color= 'r')
# plt.suptitle('neg')
plt.plot(t, normalize(feature_spectralCentroid), color='r'
        ,label='Spec_cent (Negative)')
#plt.savefig('plots/neg_test' +'_spectral_centroids.jpeg', bbox_inches='tight')

### ### ###
feature_spectralCentroid = librosa.feature.spectral_centroid(y=pos_wav_data, sr=pos_sampling_rate)[0]
# feature_wav_spec_cent.shape  # (1, 120)
# compute time for visualization
frames = range(len(feature_spectralCentroid))
t = librosa.frames_to_time(frames)  # shape (120,)


#Plotting the Spectral Centroid along the waveform
librosa.display.waveplot(pos_wav_data, sr=neg_sampling_rate, alpha=0.6,
                         label='Positive(Scream)',color= 'g')



plt.plot(t, normalize(feature_spectralCentroid), color='g'
        ,label='Spec_cent (Positive)')

# ax.set_xlabel('Features', fontsize=12)
ax_2.set_title('Normalized Spectral Centroids- Positive vs Negative')
ax_2.set_ylabel('Normalized value', fontsize=12)
ax_2.set_xlabel('Time(sec)', fontsize=12)

plt.legend(loc='lower right')
plt.tight_layout()

plt.savefig('images/spectral_centroids-Pos_vs_Neg.jpeg', bbox_inches='tight')
plt.show()


Zero crossing rate experiments...

In [None]:
# zero crossings
#Plot the signal:
plt.figure(figsize=(14, 5))
librosa.display.waveplot(neg_wav_data, sr=neg_sampling_rate)
# Zooming in
n0 = 9000
n1 = 9100
plt.figure(figsize=(14, 5))
plt.plot(neg_wav_data[n0:n1])
plt.grid()



In [None]:
n0 = 17000
n1 = 18500
plt.figure(figsize=(14, 5))
plt.plot(pos_wav_data[n0:n1])
plt.suptitle('pos')
plt.grid()
plt.figure(figsize=(14, 5))
plt.suptitle('neg')
plt.plot(neg_wav_data[n0:n1])
plt.grid()

feature_spectralCentroid = librosa.feature.spectral_centroid(y=neg_wav_data[n0:n1], sr=pos_sampling_rate)[0]
# feature_wav_spec_cent.shape  # (1, 120)
# compute time for visualization
frames = range(len(feature_spectralCentroid))
t = librosa.frames_to_time(frames)  # shape (120,)

# Normalising the spectral centroid for visualisation
def normalize(x, axis=0):
    return minmax_scale(x, axis=axis)

#Plotting the Spectral Centroid along the waveform
librosa.display.waveplot(neg_wav_data[n0:n1], sr=neg_sampling_rate, alpha=0.4)
plt.plot(t, feature_spectralCentroid, color='r')

In [None]:
zero_crossings = librosa.zero_crossings(pos_wav_data[n0:n1], pad=False)
print('pos= ',sum(zero_crossings))#16

zero_crossings = librosa.zero_crossings(neg_wav_data[n0:n1], pad=False)
print('neg= ',sum(zero_crossings))#16

In [None]:
ipd.Audio(pos_wav_data[n0:17500],rate=22050)

In [None]:
ipd.Audio(neg_wav_data[n0:17500],rate=22050)

combine pos and neg to 1 image

In [None]:
feature_ZCR = librosa.feature.zero_crossing_rate(y=pos_wav_data)
feature_ZCR.shape  # (1, 120)
# compute time for visualization
frames = range(len(feature_ZCR))
t = librosa.frames_to_time(frames)  # shape (120,)

# Plotting the Spectral Centroid along the waveform
librosa.display.waveplot(pos_wav_data, sr=neg_sampling_rate, alpha=0.6,
                         label='Positive(Scream)',color= 'g')

plt.figure(figsize=(25,5))
ax_3 = plt.subplot(133)

plt.plot(t, normalize(feature_ZCR), color='g'
         ,label='Spec_cent (Positive)')

ax_3.set_title('Zero crossing rate- Positive vs Negative')
ax_3.set_ylabel('Normalized value', fontsize=12)
ax_3.set_xlabel('Time (sec)', fontsize=12)

#plt.legend(loc='lower right')
plt.tight_layout()

# plt.savefig('images/spectral_centroids-Pos_vs_Neg.jpeg', bbox_inches='tight')
plt.show()


In [None]:
plt.figure(figsize=(15,5))
ax_3 = plt.subplot(111)

feature_ZCR = librosa.feature.zero_crossing_rate(y=neg_wav_data)
frames = range(len(feature_ZCR[0]))
t = librosa.frames_to_time(frames)  # shape (130,)
plt.plot(t, normalize(feature_ZCR[0]), color='r'
        ,label='ZCR (Negative)')
librosa.display.waveplot(neg_wav_data, sr=neg_sampling_rate,
                         alpha=0.4,label='Negative(Chat)',color= 'r')
## now for positive
feature_ZCR = librosa.feature.zero_crossing_rate(y=pos_wav_data)
frames = range(len(feature_ZCR[0]))
t = librosa.frames_to_time(frames)  # shape (130,)
plt.plot(t, normalize(feature_ZCR[0]), color='g'
        ,label='ZCR (Positive)')
librosa.display.waveplot(pos_wav_data, sr=pos_sampling_rate,
                         alpha=0.6,label='Positive(Scream)',color= 'g')


# finishing visualization
ax_3.set_title('Normalized Zero Crossing Rate- Positive vs Negative')
ax_3.set_ylabel('Normalized rate', fontsize=12)
ax_3.set_xlabel('Time (sec)', fontsize=12)
plt.legend(loc='lower right')
plt.tight_layout()
plt.savefig('images/ZCR-Pos_vs_Neg.jpeg', bbox_inches='tight')
plt.show()


spectral rolloff

In [None]:
plt.figure(figsize=(15,5))
ax_4 = plt.subplot(122)

feature_SRO = librosa.feature.spectral_rolloff(y=neg_wav_data)
frames = range(len(feature_SRO[0]))
t = librosa.frames_to_time(frames)  # shape (130,)
plt.plot(t, normalize(feature_SRO[0]), color='r'
        ,label='SRO (Negative)')
librosa.display.waveplot(neg_wav_data, sr=neg_sampling_rate,
                         alpha=0.4,label='Negative(Chat)',color= 'r')
## now for positive
feature_SRO = librosa.feature.spectral_rolloff(y=pos_wav_data)
frames = range(len(feature_SRO[0]))
t = librosa.frames_to_time(frames)  # shape (130,)
plt.plot(t, normalize(feature_SRO[0]), color='g'
        ,label='SRO (Positive)')
librosa.display.waveplot(pos_wav_data, sr=pos_sampling_rate,
                         alpha=0.6,label='Positive(Scream)',color= 'g')


# finishing visualization
ax_4.set_title('Normalized Spectral rolloff- Positive vs Negative')
ax_4.set_ylabel('Normalized value', fontsize=12)
ax_4.set_xlabel('Time (sec)', fontsize=12)
plt.legend(loc='lower right')
plt.tight_layout()
plt.savefig('images/SRO-Pos_vs_Neg.jpeg', bbox_inches='tight')
plt.show()

chromagram

In [None]:
plt.figure(figsize=(15,5))
plt.subplot(211)

feature_chromagram = librosa.feature.chroma_stft(y=neg_wav_data, sr=neg_sampling_rate)
librosa.display.specshow(feature_chromagram, x_axis='time', y_axis='chroma')
plt.colorbar()
plt.ylabel('Scales')
# finishing visualization
plt.subplot(122)
feature_chromagram_true = librosa.feature.chroma_stft(y=pos_wav_data, sr=pos_sampling_rate)
librosa.display.specshow(feature_chromagram_true, x_axis='time', y_axis='chroma')
plt.colorbar()
plt.ylabel('Scales')
plt.tight_layout()

# ax_5[0].set_title('Cromagram scales intensity- Negative')
# ax_5[0].set_ylabel(, fontsize=12)
# ax_5[0].set_xlabel('Time (sec)', fontsize=12)

# # finishing visualization

# ax_5[1].set_title('Cromagram scales intensity- Positive')
# ax_5[1].set_ylabel('Scales', fontsize=12)
# ax_5[1].set_xlabel('Time (sec)', fontsize=12)
# plt.colorbar()

# plt.tight_layout()
# # plt.savefig('images/Cromagram-Pos_vs_Neg.jpeg', bbox_inches='tight')
# plt.show()

In [None]:
plt.figure(figsize=(10, 8))
ax_6 = plt.subplot(2, 1, 1)

ax_6.set_title('Negative', color='r')
librosa.display.specshow(feature_chromagram, x_axis='time', y_axis='chroma')
plt.colorbar()
# plt.suptitle('a')
plt.ylabel('Scale')
ax_7 = plt.subplot(2, 1, 2,)
librosa.display.specshow(feature_chromagram_true, x_axis='time', y_axis='chroma')
plt.colorbar()
plt.ylabel('Scale')
ax_7.set_title('Positive', color='g')
plt.suptitle('Chromagram scales - Positive vs Negative',y=1.03,x=0.43)
plt.tight_layout()
plt.savefig('images/Cromagram-Pos_vs_Neg.jpeg', bbox_inches='tight')
plt.show()


In [None]:
#  rms (and mfccs) 
n_mfcc = 20  # resolution amount
mfccs = librosa.feature.mfcc(y=neg_wav_data, sr=neg_sampling_rate, n_mfcc=n_mfcc)
S, phase = librosa.magphase(mfccs)
rms = librosa.feature.rms(S=S)

mfccs = librosa.feature.mfcc(y=pos_wav_data, sr=pos_sampling_rate, n_mfcc=n_mfcc)
S, phase = librosa.magphase(mfccs)
rms_pos = librosa.feature.rms(S=S)


plt.figure(figsize=(10, 8))
ax_6 = plt.subplot(2, 1, 1)
plt.semilogy(rms.T, label='RMS Negative Energy',color='r')
plt.xticks([])
plt.xlim([0, rms.shape[-1]])

plt.semilogy(rms_pos.T, label='RMS Positive Energy', color='g')
plt.xticks([])
plt.xlim([0, rms_pos.shape[-1]])



plt.legend()
# ax_7 = plt.subplot(2, 1, 2,)
# librosa.display.specshow(feature_chromagram_true, x_axis='time', y_axis='chroma')
# plt.colorbar()
plt.ylabel('Value')
plt.xlabel('Time')
# ax_7.set_title('Positive', color='g')
plt.suptitle('RMS energy - Positive vs Negative',y=1.03,x=0.43)
plt.tight_layout()
plt.savefig('images/RMS_energy-Pos_vs_Neg.jpeg', bbox_inches='tight')
plt.show()

In [None]:
plt.figure(figsize=(10, 8))
ax_6 = plt.subplot(2, 1, 1)
ax_6.set_title('Negative', color='r')
S = librosa.feature.melspectrogram(y=neg_wav_data, sr=neg_sampling_rate)
S_dB = librosa.power_to_db(S, ref=np.max)
librosa.display.specshow(S_dB, x_axis='time',
                          y_axis='mel', sr=neg_sampling_rate,
                          fmax=8000)
plt.colorbar(format='%+2.0f dB')

ax_7 = plt.subplot(2, 1, 2,)
S = librosa.feature.melspectrogram(y=pos_wav_data, sr=pos_sampling_rate)
S_dB = librosa.power_to_db(S, ref=np.max)
librosa.display.specshow(S_dB, x_axis='time',
                          y_axis='mel', sr=pos_sampling_rate,
                          fmax=8000)
plt.colorbar(format='%+2.0f dB')
ax_7.set_title('Positive', color='g')
plt.suptitle('Mel-frequency spectrogram- Positive vs Negative',y=1.03,x=0.43)
plt.tight_layout()
plt.savefig('images/Mel_spec-Pos_vs_Neg.jpeg', bbox_inches='tight')
plt.show()


In [None]:
plt.figure(figsize=(10, 8))
ax_6 = plt.subplot(2, 1, 1)
ax_6.set_title('Negative', color='r')
mfccs = librosa.feature.mfcc(y=neg_wav_data, sr=neg_sampling_rate)
librosa.display.specshow(normalize(mfccs), x_axis='time', sr=neg_sampling_rate)
plt.colorbar()

ax_7 = plt.subplot(2, 1, 2,)
mfccs_pos = librosa.feature.melspectrogram(y=pos_wav_data, sr=pos_sampling_rate)
librosa.display.specshow(normalize(mfccs_pos), x_axis='time', sr=pos_sampling_rate)
plt.colorbar()
ax_7.set_title('Positive', color='g')
plt.suptitle('Normalized MFCCS- Positive vs Negative',y=1.03,x=0.43)
plt.tight_layout()
plt.savefig('images/Normalized_MFCCS-Pos_vs_Neg.jpeg', bbox_inches='tight')
plt.show()


my code snippet experiments

In [None]:
import IPython.display as ipd
wav_name = "train\positive\scream\\1_scream_female_room.wav"
print(wav_name)

In [None]:
ipd.Audio(filename=wav_name,rate=22050)

In [None]:
import librosa
import librosa.display
wav_data, sampling_rate = librosa.load(wav_name, duration=5, sr=22050)
chroma_stft = librosa.feature.chroma_stft(y=wav_data, sr=sampling_rate)
plt.figure(figsize=(10, 4))
librosa.display.specshow(chroma_stft, y_axis='chroma', x_axis='time')
plt.colorbar()
plt.title('Chromagram')
plt.tight_layout()
plt.savefig('chroma')
plt.show()


In [None]:
chroma_stft.shape

## Experiment 1

Create an image dataset from wav files

use mfcc's first

In [None]:
create_image_dataset_from_wav_dataset()

In [None]:
create_entire_dataset() # using same images 1:1 ratio POS : NEG 

now let's create the models and try to train_test and see precision

image models...based on (edited, not copied!)
https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

In [None]:
# קexperimenting with image datagenerator

datagen = ImageDataGenerator(
        shear_range=0.2,
        zoom_range=0.2)
#         rotation_range=40,
#         width_shift_range=0.2,
#         # height_shift_range=0.2,  # not logical for audio
#         shear_range=0.2,
#         zoom_range=0.2,
#         # horizontal_flip=True, # not logical for audio- reverse scream?
#         fill_mode='nearest')

        
        
wav_p = "train\positive\scream\scream_images\\1_scream_female_room_MFCC.jpeg"
img = load_img(wav_p)  # this is a PIL image
x = img_to_array(img)  # this is a Numpy array with shape (3, 150, 150)
x = x.reshape((1,) + x.shape)  # this is a Numpy array with shape (1, 3, 150, 150)

# the .flow() command below generates batches of randomly transformed images
# and saves the results to the `preview/` directory
i = 0
for batch in datagen.flow(x, batch_size=1,
                          save_to_dir='preview', save_prefix='', save_format='jpeg'):
    i += 1
    if i > 20:
        break  # otherwise the generator would loop indefinitely

In [None]:
experiment_1_images()

# Experiments functions

## Experiment 3 (preparations)
preparations for experiment 3

goal: compare different feature images and see who gives the best results

In [None]:
from pathlib import Path
feature= "zcr"
feature_path= "data\\test\\test_" + feature
Path(feature_path).mkdir(parents=False, exist_ok=True) # dont create new parents, dont raise error if destination exists
scream_path= feature_path+"\\scream"
Path(scream_path).mkdir(parents=False, exist_ok=True) # dont create new parents, dont raise error if destination exists
Far_Miss_path= feature_path+"\\Far_Miss"
Path(Far_Miss_path).mkdir(parents=False, exist_ok=True) # dont create new parents, dont raise error if destination exists


In [None]:
# goal: compare different feature images and see who gives the best results
#exp_3
clfGlobals = global_For_Clf('scream')
feature_names = ["SpecCent","Zcr","SpecRol","Chro","Rms","MelSpec"]
feature_accuracies= []
# create dataset for all the above features
create_image_train_test_dataset_by_features(feature_names,clfGlobals.n_mfcc,source="train")    

In [None]:
# create test dataset as well
clfGlobals = global_For_Clf('scream')
feature_names = ["SpecCent","Zcr","SpecRol","Chro","Rms","MelSpec"]
create_image_train_test_dataset_by_features(feature_names,clfGlobals.n_mfcc,source="test_wav")

In [None]:
# compare results: same "image receiving" model, but every time it will learn from a single feature

feature_names = ["SpecCent","Zcr","SpecRol","Chro","Rms","MelSpec"]
epochs_vector= [50]
for train_epochs in epochs_vector:
    accuracies= []
    for feature in feature_names:
        accuracies.append(train_save_getRes_val_test(feature,train_epochs=train_epochs))

    acc_validation= [x[0] for x in accuracies]
    acc_test= [x[1] for x in accuracies]

    plot_exp_3(feature_names,acc_validation, acc_test, train_epochs)

    
    

In [None]:
acc_test

In [None]:
# chose not to run this because my goal was to see if images can win in accuracy when given the
# same epoch amount.
clfGlobals = global_For_Clf('scream')
feature_names = ["SpecCent","Zcr","SpecRol","Chro","Rms","MelSpec"]
feature_accuracies= []
# create dataset for all the above features
create_image_train_test_dataset_by_features("MFCC",clfGlobals.n_mfcc,source="train")    
create_image_train_test_dataset_by_features("MFCC",clfGlobals.n_mfcc,source="test_wav")
feature_names = ["SpecCent","Zcr","SpecRol","Chro","Rms","MelSpec","MFCC"]
epochs_vector= [100]
for train_epochs in epochs_vector:
    accuracies= []
    for feature in feature_names:
        accuracies.append(train_save_getRes_val_test(feature,train_epochs=train_epochs))

    acc_validation= [x[0] for x in accuracies]
    acc_test= [x[1] for x in accuracies]

    plot_exp_3(feature_names,acc_validation, acc_test, train_epochs)

In [None]:
#checking to see that logic is working.success.

# compare results: same "image receiving" model, but every time it will learn from a single feature
feature_names = ["SpecCent","Zcr","SpecRol","Chro","Rms","MelSpec"]
train_epochs= 1
accuracies= []
for feature in feature_names:
    accuracies.append(train_save_getRes_val_test(feature,train_epochs=train_epochs))

acc_validation= [x[0] for x in accuracies]
acc_test= [x[1] for x in accuracies]

plot_exp_3(feature_names,acc_validation, acc_test, train_epochs)

    

In [None]:
def plot_exp_3(feature_names,acc_validation, acc_test, train_epochs):
    plt.figure(figsize=(30, 5))
    ax = plt.subplot(132)
    plt.plot(feature_names,acc_validation,label='Validation', color='yellow')
    plt.plot(feature_names,acc_test,label='Test', color='purple')

    header= 'Image models accuracies for '+ str(train_epochs) + ' training epochs'
    plt.suptitle(header)
    plt.legend(loc='lower right')
    ax.set_xlabel('Feature', fontsize=12)
    ax.set_ylabel('Accuracy', fontsize=12)


    # plt.tight_layout()
    p_save='images/exp_3_Image_feature_accuracies_Epochs_'+str(train_epochs)
    plt.savefig(p_save)
    plt.show()
    

In [None]:
# prove of concept.
feature_names = ["SpecCent","Zcr"]
train_epochs= 1
ti=[[10,0],[11,1]]
acc_validation= [x[0] for x in ti]
acc_test= [x[1] for x in ti]
print(acc_validation,acc_test)

plt.figure(figsize=(30, 5))
ax = plt.subplot(132)
plt.plot(feature_names,acc_validation,label='validation', color='yellow')
plt.plot(feature_names,acc_test,label='test', color='purple')

header= 'Image models accuracies for '+ str(train_epochs) + ' training epochs'
plt.suptitle(header)
plt.legend(loc='lower right')
ax.set_xlabel('Feature', fontsize=12)
ax.set_ylabel('Accuracy', fontsize=12)


# plt.tight_layout()
plt.savefig('images/exp_3_Image_feature_accuracies')
plt.show()


In [None]:
def train_save_getRes_val_test(feature,train_epochs=50):
    acc_val_test= [[],[]]
    val_acc_slot= 0
    test_acc_slot=1

    batch_size = 5

    # this is the augmentation configuration we will use for training
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=False)  # without this- reverse audio isnt relevant

    # this is the augmentation configuration we will use for testing:
    # only rescaling
    test_datagen = ImageDataGenerator(rescale=1./255)

    # this is a generator that will read pictures found in
    # subfolers of 'data/train', and indefinitely generate
    # batches of augmented image data
    path_train= 'data/train/train_' + str(feature)
    train_generator = train_datagen.flow_from_directory(
        path_train,  # this is the target directory
        target_size=(150, 150),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='binary')  # since we use binary_crossentropy loss, we need binary labels


    # this is a similar generator, for validation data
    path_val= 'data/validation/validation_' + str(feature)
    validation_generator = test_datagen.flow_from_directory(
        path_val,
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='binary')


    input_shape_image= (150,150,3)
    model= experiment_1_get_model(input_shape_image)
    history= model.fit_generator(
        train_generator,
        steps_per_epoch=2000 // batch_size,
        epochs=train_epochs,
        validation_data=validation_generator,
        validation_steps=800 // batch_size)
    path_model= 'weights/exp_3/weights_'+str(feature)+'.h5'
    model.save_weights(path_model)  # always save your weights after training or during training

    # print("val_acc: ", history.history['accuracy'])
    acc_val_test[val_acc_slot]= history.history['accuracy'][-1]


    # test set
    total_samples= 50 #test- TODO- dont hard code..
    # batch_size = 5
    STEP_SIZE_VALID= total_samples // batch_size
    test_datagen = ImageDataGenerator(rescale=1./255)
    # this is a similar generator, for validation data
    path_test= 'data/test/test_' + str(feature)
    test_generator = test_datagen.flow_from_directory(
        path_test,
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='binary')


    history_test= model.evaluate_generator(generator=test_generator, steps=STEP_SIZE_VALID)
    acc_val_test[test_acc_slot]= history_test[test_acc_slot]
    print("feature: ", feature,". acc: [validation, test]: ", acc_val_test)
    return acc_val_test


In [None]:
print("feature: ", feature,". acc: [validation, test]: ", acc_val_test)

In [None]:
from shutil import copyfile

def create_image_train_test_dataset_by_features(feature_names,mfcc_amount,source="train"):
    """

    """
    # reaching each wav file
    path_train = Path(source)
    for path_label in sorted(path_train.iterdir()):
        print("currently in : " + str(path_label))  # train\negative
        positiveOrNegative = path_label.name  # negative
        #  print(label)
        for path_class in tqdm(sorted(path_label.iterdir())):
            # print info
            print("currently in class: " + str(path_class))
            # print amount of files in directory
            onlyfiles = next(os.walk(path_class))[2]  # dir is your directory path as string
            wav_amount: int = len(onlyfiles)
            print("wav amount= " + str(wav_amount))
            #  true_class= path_class.name
            #  print(true_class)
            #  print(path_class)  #  train\negative\scream
            #  print("name: "+ str(path_class.name))

            # lev improvement according to coordination with mori- irrelevant since 7.8.19
            if (positiveOrNegative == "positive"):
                label = path_class.name  # scream
            else:
                """
                lev- updating to differentiate near misses and far misses.
                keeping if-else structure for future options

                old:
                print(f"switching label from {path_class.name} to <negative>")  # added reporting
                label = "negative"
                new:

                """
                label = path_class.name  # NearMiss_scream

            wave_file_paths = path_class.glob('**/*.wav')  # <class 'generator'>
            #  print(type(wave_file_paths))
            count = 0  # for progress tracking
            print('covered WAV files to images: ')
            
            #TODO can improve copying logic of files. for now its hard coded
            train_amount= 300 # its 90% of 376~
            current_copied= 0 # when this reaches train_amount we copy files to validation
            for wav_path in sorted(wave_file_paths):
                wav_path = Path(wav_path)
                count += 1
                if (count % 50) == 0:
                    fp = sys.stdout
                    print(str(count), end=' ')
                    fp.flush()  # makes print flush its buffer (doesnt print without it)
                #  print(type(wav_path))  #  <class 'pathlib.WindowsPath'>
                #  print(wav_path)  #  train\positive\scream\110142__ryding__scary-scream-4.wav
                #  print(wav_path.name)  #  110142__ryding__scary-scream-4.wav
                try:
                    wav_data, sampling_rate = librosa.load(wav_path, duration=5, sr=22050)
                    
                    for feature in feature_names:
                        dest_dir_path=str(path_class) + "\\" + str(path_class.name) +"_images_"+ str(feature) 
                        Path(dest_dir_path).mkdir(parents=False, exist_ok=True) # dont create new parents, dont raise error if destination exists
                        img_path = str(dest_dir_path) + str(wav_path.stem)
                        #print(img_path)
                        img_saved_path= extract_save_by_feature(wav_data,sampling_rate,img_path,feature,mfcc_amount)   
                        
                        #new function?
                        if source == "train":
                            if current_copied <= train_amount:
                                dest_img_path= "data\\train\\train_"+feature+"\\"+label
                                Path(dest_img_path).mkdir(parents=True, exist_ok=True) #create missing parent directories
                                img_dst_path= dest_img_path+"\\"+str(wav_path.stem)+".jpeg"
                                copyfile(img_saved_path, img_dst_path)

                            else:
                                dest_img_path= "data\\validation\\validation_"+feature+"\\"+label
                                Path(dest_img_path).mkdir(parents=True, exist_ok=True) #create missing parent directories
                                img_dst_path= dest_img_path+"\\"+str(wav_path.stem)+".jpeg"
                                copyfile(img_saved_path, img_dst_path)
                        else:
                            dest_img_path= "data\\test\\test_"+feature+"\\"+label
                            Path(dest_img_path).mkdir(parents=True, exist_ok=True) #create missing parent directories
                            img_dst_path= dest_img_path+"\\"+str(wav_path.stem)+".jpeg"
                            copyfile(img_saved_path, img_dst_path)
                            
                    current_copied+=1
                    
                except NoBackendError as e:
                    print("audioread.NoBackendError " + "for wav path " + str(wav_path))
                    continue  # one file didnt work, continue to next one



In [None]:
def extract_save_by_feature(wav_data,sampling_rate,img_path,feature,mfcc_amount):
    img_path_save = img_path +"_"+str(feature)+".jpeg"
    # print('img_path_save= ' + img_path_save)
    plt.figure(figsize=(10, 8))
    ax_6 = plt.subplot(2, 1, 1)
    
    if feature == "MFCC":
        mfccs = librosa.feature.mfcc(y=wav_data, sr=sampling_rate)
        librosa.display.specshow(normalize(mfccs), x_axis='time', sr=sampling_rate)
    
    elif feature == "SpecCent":
        feature_spectralCentroid = librosa.feature.spectral_centroid(y=wav_data, sr=sampling_rate)[0]
        frames = range(len(feature_spectralCentroid))
        t = librosa.frames_to_time(frames)  # shape (120,)
        plt.plot(t,normalize(feature_spectralCentroid))
        
    elif feature == "Zcr":
            feature_ZCR = librosa.feature.zero_crossing_rate(y=wav_data)
            frames = range(len(feature_ZCR[0]))
            t = librosa.frames_to_time(frames)  # shape (130,)
            plt.plot(t, normalize(feature_ZCR[0]))
            
    elif feature == "SpecRol": 
            feature_SRO = librosa.feature.spectral_rolloff(y=wav_data)
            frames = range(len(feature_SRO[0]))
            t = librosa.frames_to_time(frames)  # shape (130,)
            plt.plot(t, normalize(feature_SRO[0]))
            
    elif feature == "Chro":
            feature_chromagram = librosa.feature.chroma_stft(y=wav_data, sr=sampling_rate)
            librosa.display.specshow(feature_chromagram, x_axis='time', y_axis='chroma')     
            
    elif feature == "Rms":
            mfccs = librosa.feature.mfcc(y=wav_data, sr=sampling_rate, n_mfcc=mfcc_amount)
            S, phase = librosa.magphase(mfccs)
            rms = librosa.feature.rms(S=S)
            plt.semilogy(rms.T)
            plt.xticks([])
            plt.xlim([0, rms.shape[-1]])
            
    elif feature == "MelSpec":
            S = librosa.feature.melspectrogram(y=wav_data, sr=sampling_rate)
            S_dB = librosa.power_to_db(S, ref=np.max)
            librosa.display.specshow(S_dB, x_axis='time',
                          y_axis='mel', sr=sampling_rate,
                          fmax=8000)
            

    plt.suptitle(feature,y=1.03,x=0.43)        
    plt.tight_layout()
    plt.savefig(img_path_save, bbox_inches='tight')
    # plt.show()   
    return img_path_save
    

verifying appearance for aboce cell (function)

In [None]:
S = librosa.feature.melspectrogram(y=wav_data, sr=sampling_rate)
S_dB = librosa.power_to_db(S, ref=np.max)
librosa.display.specshow(S_dB, x_axis='time',
                          y_axis='mel', sr=sampling_rate,
                          fmax=8000)

plt.suptitle("Mel",y=1.03,x=0.43)
plt.tight_layout()
plt.savefig("testing_7.jpeg", bbox_inches='tight')
plt.show()  

In [None]:
mfcc_amount = 20  # resolution amount
mfccs = librosa.feature.mfcc(y=wav_data, sr=sampling_rate, n_mfcc=mfcc_amount)
S, phase = librosa.magphase(mfccs)
rms = librosa.feature.rms(S=S)
plt.semilogy(rms.T)
plt.xticks([])
plt.xlim([0, rms.shape[-1]])

plt.suptitle("RMS",y=1.03,x=0.43)
plt.tight_layout()
plt.savefig("testing_5.jpeg", bbox_inches='tight')
plt.show()  

In [None]:
feature_chromagram = librosa.feature.chroma_stft(y=wav_data, sr=sampling_rate)
librosa.display.specshow(feature_chromagram, x_axis='time', y_axis='chroma')

plt.suptitle("CHRO",y=1.03,x=0.43)
plt.tight_layout()
plt.savefig("testing_4.jpeg", bbox_inches='tight')
plt.show()  

In [None]:
feature_SRO = librosa.feature.spectral_rolloff(y=wav_data)
frames = range(len(feature_SRO[0]))
t = librosa.frames_to_time(frames)  # shape (130,)
plt.plot(t, normalize(feature_SRO[0]))
plt.suptitle("SPECT",y=1.03,x=0.43)
plt.tight_layout()
plt.savefig("testing_3.jpeg", bbox_inches='tight')
plt.show()  

In [None]:
feature_ZCR = librosa.feature.zero_crossing_rate(y=wav_data)
frames = range(len(feature_ZCR[0]))
t = librosa.frames_to_time(frames)  # shape (130,)
plt.plot(t, normalize(feature_ZCR[0]))

#plt.suptitle("zcr",y=1.03,x=0.43)
plt.tight_layout()
plt.savefig("testing_2.jpeg", bbox_inches='tight')
plt.show()   


In [None]:
feature_spectralCentroid = librosa.feature.spectral_centroid(y=wav_data, sr=sampling_rate)[0]
frames = range(len(feature_spectralCentroid))
t = librosa.frames_to_time(frames)  # shape (120,)
plt.plot(t,normalize(feature_spectralCentroid))
plt.suptitle("SpecCent",y=1.03,x=0.43)

plt.tight_layout()
plt.savefig("testing.jpeg", bbox_inches='tight')
plt.show()   


In [None]:
feature_spectralCentroid = librosa.feature.spectral_centroid(y=neg_wav_data, sr=pos_sampling_rate)[0]
# feature_wav_spec_cent.shape  # (1, 120)
# compute time for visualization
frames = range(len(feature_spectralCentroid))
t = librosa.frames_to_time(frames)  # shape (120,)

plt.figure(figsize=(25,5))
ax_2 = plt.subplot(133)
# plt.suptitle('Normalized Spectral Centroids- Positive vs Negative')
#Plotting the Spectral Centroid along the waveform
librosa.display.waveplot(neg_wav_data, sr=neg_sampling_rate,
                         alpha=0.4,label='Negative(Chat)',color= 'r')
# plt.suptitle('neg')
plt.plot(t, normalize(feature_spectralCentroid), color='r'
        ,label='Spec_cent (Negative)')

# Experiment 3

In [None]:
# run in the above cells to show a visual process...
# exp_3()


# Experiment 4

preparations- draw one image with 3 features in 2 graphs.

In [None]:
plt.figure(figsize=(10, 10))
ax_6 = plt.subplot(2, 1, 1)
ax_6.set_title('MelSpec')
S = librosa.feature.melspectrogram(y=wav_data, sr=sampling_rate)
S_dB = librosa.power_to_db(S, ref=np.max)
librosa.display.specshow(S_dB, x_axis='time',
              y_axis='mel', sr=sampling_rate,
              fmax=8000)
ax_7 = plt.subplot(2, 1, 2)
ax_7.set_title('SpecRol(Red), Rms(Green)')
mfccs = librosa.feature.mfcc(y=wav_data, sr=sampling_rate, n_mfcc=mfcc_amount)
S, phase = librosa.magphase(mfccs)
rms = librosa.feature.rms(S=S)
plt.plot(t, normalize(rms[0]), color='g'
        ,label='Rms')
feature_SRO = librosa.feature.spectral_rolloff(y=wav_data)
frames = range(len(feature_SRO[0]))
t = librosa.frames_to_time(frames)  # shape (130,)
plt.plot(t, normalize(feature_SRO[0]), color='r'
        ,label='SpecRol')
ax_7.set_ylabel('Normalized value', fontsize=12)
ax_7.set_xlabel('Time (sec)', fontsize=12)

plt.suptitle('Exp_4- best 3 features',y=0.95,x=0.50)
plt.savefig('images/exp_4_test.jpeg', bbox_inches='tight')
plt.show()

In [None]:
    clfGlobals = global_For_Clf('scream')
    feature_names = ["MelSpec_Rms_SpecRol"] 
    # create dataset for all the above features
    create_image_train_test_dataset_by_features(feature_names,clfGlobals.n_mfcc,source="train")  

In [None]:
   # because i didn't use scatter while plotting a single feature doesnt draw anything,
    # that's why i printed afterwards the results...
    # for a small experiment that's ok but for future enhancement
    # of course i'll need to fix this..
    
    clfGlobals = global_For_Clf('scream')
    feature_names = ["MelSpec_Rms_SpecRol"] 
    # create test dataset as well
    create_image_train_test_dataset_by_features(feature_names,clfGlobals.n_mfcc,source="test_wav")
    # compare results: same "image receiving" model, but every time it will learn from a single feature
    epochs_vector= [50]
    for train_epochs in epochs_vector:
        accuracies= []
        for feature in feature_names:
            accuracies.append(train_save_getRes_val_test(feature,train_epochs=train_epochs))

        acc_validation= [x[0] for x in accuracies]
        acc_test= [x[1] for x in accuracies]

        plot_exp_3(feature_names,acc_validation, acc_test, train_epochs)


In [None]:
acc_test

In [None]:
acc_validation

# Experiment 5

In [None]:
from keras import applications
# build the VGG16 network
model = applications.VGG16(include_top=False, weights='imagenet')

In [None]:
print(model.summary())

Beginning of unsuccessful attempt- keeping this section for documentation of my mistakes.

Basically I didn't understand the correct flow of this spesific approach in 
Transfer learning, so I couldn't figure out how to properly utilize it.

What I understand now (and this is the correct flow) is that:
1. you load a pretrained model with its predefined weights.
2. you pass ALL future samples (train,validation AND TEST) to the pretrained model.
3. the output from the pretrained model are FEATURES that are actually the:
4. input for the "top model" which outputs the class prediction.

* my mistake was at stage 3. i couldn't understand how to "predict" correctly...:)

In [None]:
# dimensions of our images.
img_width, img_height = 150, 150

top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = 'data/train/train_MelSpec'
validation_data_dir = 'data/validation/validation_MelSpec'
nb_train_samples = 602   # 301 per class
nb_validation_samples = 132  # 66 per class
epochs = 50
batch_size = 2

In [None]:
datagen = ImageDataGenerator(rescale=1. / 255)
# build the VGG16 network
model = applications.VGG16(include_top=False, weights='imagenet')

generator = datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)
bottleneck_features_train = model.predict_generator(
    generator, nb_train_samples // batch_size)
# np.save(open('bottleneck_features_train.npy', 'w'),bottleneck_features_train)

generator = datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)
bottleneck_features_validation = model.predict_generator(
    generator, nb_validation_samples // batch_size)
# np.save(open('bottleneck_features_validation.npy', 'w'), bottleneck_features_validation)


In [None]:
train_data = bottleneck_features_train
train_labels = np.array(
    [0] * (nb_train_samples // 2) + [1] * (nb_train_samples // 2))

validation_data = bottleneck_features_validation
validation_labels = np.array(
    [0] * (nb_validation_samples // 2) + [1] * (nb_validation_samples // 2))



In [None]:
train_data.shape

In [None]:
validation_data.shape

In [None]:
print(train_labels.shape, validation_labels.shape)

In [None]:
model = Sequential()
model.add(Flatten(input_shape=train_data.shape[1:]))  # take from second place (0 is first)
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy', metrics=['accuracy'])

model.fit(train_data, train_labels,
          epochs=epochs,
          batch_size=batch_size,
          validation_data=(validation_data, validation_labels))
model.save_weights(top_model_weights_path)


In [None]:
model.input_shape

End of unsuccessful attempt

for some reson this want succesful...val accuracy of 0 and 1 and i cant predict on my own data so it doesnt worth much...trying a different tutorial to learn from:
https://towardsdatascience.com/a-comprehensive-hands-on-guide-to-transfer-learning-with-real-world-applications-in-deep-learning-212bf3b2f27a

In [None]:
# load my mel spec model
input_shape_image= (150,150,3)
model_melSpec= experiment_1_get_model(input_shape_image)
model_melSpec.load_weights('weights\exp_3\weights_MelSpec.h5')

In [None]:
#from keras.applications import vgg16
from keras.models import Model
#import keras

vgg = applications.VGG16(include_top=False, weights='imagenet', 
                                     input_shape=input_shape_image)

output = vgg.layers[-1].output
output = Flatten()(output)
vgg_model = Model(vgg.input, output)

vgg_model.trainable = False
for layer in vgg_model.layers:
    layer.trainable = False
    
import pandas as pd
pd.set_option('max_colwidth', -1)
layers = [(layer, layer.name, layer.trainable) for layer in vgg_model.layers]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])    

In [None]:
import glob
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img

IMG_DIM = (150, 150)

train_files = glob.glob('data/train/train_MelSpec/Far_miss/*',recursive=True)
train_files.extend(glob.glob('data/train/train_MelSpec/scream/*',recursive=True))
print(len(train_files))
train_imgs = [img_to_array(load_img(img, target_size=IMG_DIM)) for img in train_files]
train_imgs = np.array(train_imgs)

print(train_labels)
validation_files = glob.glob('data/validation/validation_MelSpec/Far_miss/*',recursive=True)
validation_files.extend(glob.glob('data/validation/validation_MelSpec/scream/*',recursive=True))
validation_imgs = [img_to_array(load_img(img, target_size=IMG_DIM)) for img in validation_files]
validation_imgs = np.array(validation_imgs)


print('Train dataset shape:', train_imgs.shape, 
      '\tValidation dataset shape:', validation_imgs.shape)

In [None]:
train_imgs_scaled = train_imgs.astype('float32')
validation_imgs_scaled  = validation_imgs.astype('float32')
train_imgs_scaled /= 255
validation_imgs_scaled /= 255

print(train_imgs[0].shape)
array_to_img(train_imgs[0])

In [None]:
nb_train_samples = 602   # 301 per class
nb_validation_samples = 132  # 66 per class

train_labels = np.array(
    [0] * (nb_train_samples // 2) + [1] * (nb_train_samples // 2))

validation_labels = np.array(
    [0] * (nb_validation_samples // 2) + [1] * (nb_validation_samples // 2))


In [None]:
bottleneck_feature_example = vgg.predict(train_imgs_scaled[0:1])
print(bottleneck_feature_example.shape)
plt.imshow(bottleneck_feature_example[0][:,:,0])

In [None]:
def get_bottleneck_features(model, input_imgs):
    features = model.predict(input_imgs, verbose=0)
    return features
    
train_features_vgg = get_bottleneck_features(vgg_model, train_imgs_scaled)
validation_features_vgg = get_bottleneck_features(vgg_model, validation_imgs_scaled)

print('Train Bottleneck Features:', train_features_vgg.shape, 
      '\tValidation Bottleneck Features:', validation_features_vgg.shape)

In [None]:
vgg_model.output_shape[1]

In [None]:
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, InputLayer
from keras.models import Sequential
from keras import optimizers

input_shape = vgg_model.output_shape[1] # model will receive the output of the pretrained model without
                                        # th top layer (without the prediction)

model = Sequential()
model.add(InputLayer(input_shape=(input_shape,)))
model.add(Dense(512, activation='relu', input_dim=input_shape))
model.add(Dropout(0.3))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['accuracy'])

model.summary()

In [None]:
batch_size= 2
epochs= 50

train_labels_enc = train_labels
validation_labels_enc = validation_labels

In [None]:
history = model.fit(x=train_features_vgg, y=train_labels_enc,
                    validation_data=(validation_features_vgg, validation_labels_enc),
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1)

In [None]:
model.save('Far_Miss_scream_tlearn_basic_cnn.h5')

try test data

In [None]:
IMG_DIM = (150, 150)
test_files = glob.glob('data/test/test_MelSpec/Far_miss/*',recursive=True)
test_files.extend(glob.glob('data/test/test_MelSpec/scream/*',recursive=True))
print(len(test_files))
test_imgs = [img_to_array(load_img(img, target_size=IMG_DIM)) for img in test_files]
test_imgs = np.array(test_imgs)
test_imgs_scaled = test_imgs.astype('float32')
test_imgs_scaled /= 255
print(test_imgs_scaled[0].shape)
array_to_img(test_imgs_scaled[0])

In [None]:
test_amount= 50
test_labels = np.array(
    [0] * (test_amount // 2) + [1] * (test_amount // 2))
test_labels.shape

In [None]:
    test_features_vgg = get_bottleneck_features(vgg_model, test_imgs_scaled)
    test_features_vgg.shape

In [None]:
    predictions = model.predict_classes(test_features_vgg)
    # evaluate accuracy
    acc = accuracy_score(test_labels, predictions)
    print('Test Accuracy for basic VGG16 transfer learning:', acc)

Pre-trained CNN model as a Feature Extractor with Image Augmentation

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255, zoom_range=0.3,  
                                   horizontal_flip=False, fill_mode='nearest')

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow(train_imgs, train_labels_enc, batch_size=2)
val_generator = val_datagen.flow(validation_imgs, validation_labels_enc, batch_size=2)

We won’t extract the bottleneck features like last time since we will be training on data generators; hence, we will be passing the vgg_model object as an input to our own model.

In [None]:
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, InputLayer
from keras.models import Sequential
from keras import optimizers

nb_train_samples = 602   # 301 per class
nb_validation_samples = 132  # 66 per class
batch_size= 2

model_2 = Sequential()
model_2.add(vgg_model) # the VGG-16 model’s layers are still frozen
# we are still using it as a basic feature extractor only.
# vgg_model.output_shape[1] = 8192
model_2.add(Dense(512, activation='relu', input_dim=input_shape))
model_2.add(Dropout(0.3))
model_2.add(Dense(512, activation='relu'))
model_2.add(Dropout(0.3))
model_2.add(Dense(1, activation='sigmoid'))

model_2.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=2e-5),
              metrics=['accuracy'])
# We bring the learning rate slightly down since 
# we will be training for 100 epochs and don’t want to
# make any sudden abrupt weight adjustments to our model layers.
              
history = model_2.fit_generator(train_generator, steps_per_epoch= nb_train_samples//batch_size ,
                              epochs=100,
                              validation_data=val_generator, 
                              validation_steps=nb_validation_samples//batch_size , 
                              verbose=1)        


In [None]:
model_2.save('Far_Miss_scream_tlearn_img_aug_cnn.h5') 

In [None]:
predictions_2 = model_2.predict_classes(test_imgs_scaled, verbose=0)
# evaluate accuracy
acc = accuracy_score(test_labels, predictions_2)
print('Test Accuracy for VGG16 transfer learning with image Augmentation:', acc)

what if the model was the same model i used previousely, but now it will have the pretrained model ...
the answer is that you can't put conv2d after dense on same branch so it didn't work

In [None]:
# model_2_2 = Sequential()
# model_2_2.add(vgg_model)
# model_2_2.add(Dense(512, activation='relu', input_dim=input_shape)) 

# model_2_2.add(Conv2D(32, (3, 3)))
# model_2_2.add(Activation('relu'))
# model_2_2.add(MaxPooling2D(pool_size=(2, 2)))

# model_2_2.add(Conv2D(32, (3, 3)))
# model_2_2.add(Activation('relu'))
# model_2_2.add(MaxPooling2D(pool_size=(2, 2)))

# model_2_2.add(Conv2D(64, (3, 3)))
# model_2_2.add(Activation('relu'))
# model_2_2.add(MaxPooling2D(pool_size=(2, 2)))

# model_2_2.add(Flatten())
# model_2_2.add(Dense(64))
# model_2_2.add(Activation('relu'))
# model_2_2.add(Dropout(0.5))
# model_2_2.add(Dense(1))
# model_2_2.add(Activation('sigmoid'))

# model_2_2.compile(loss='binary_crossentropy',
#           optimizer=optimizers.RMSprop(lr=2e-5),
#           metrics=['accuracy'])

# history = model_2_2.fit_generator(train_generator, steps_per_epoch= nb_train_samples//batch_size ,
#                               epochs=50,
#                               validation_data=val_generator, 
#                               validation_steps=nb_validation_samples//batch_size , 
#                               verbose=1)   

# model_2_2.save('Far_Miss_scream_tlearn_img_aug_cnn_MyModel.h5') 

# predictions_2_2 = model_2_2.predict_classes(test_imgs_scaled, verbose=0)
# # evaluate accuracy
# acc = accuracy_score(test_labels, predictions_2_2)
# print('Test Accuracy for VGG16 transfer learning with image Augmentation My Model:', acc)

Pre-trained CNN model with Fine-tuning and Image Augmentation

In [None]:
vgg_model.trainable = True

set_trainable = False
for layer in vgg_model.layers:
    if layer.name in ['block5_conv1', 'block4_conv1']:
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False
        
layers = [(layer, layer.name, layer.trainable) for layer in vgg_model.layers]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])  

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255, zoom_range=0.3,
                                   horizontal_flip=False, fill_mode='nearest')
val_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow(train_imgs, train_labels_enc, batch_size=2)
val_generator = val_datagen.flow(validation_imgs, validation_labels_enc, batch_size=2)

from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, InputLayer
from keras.models import Sequential
from keras import optimizers

model_3 = Sequential()
model_3.add(vgg_model)
model_3.add(Dense(512, activation='relu', input_dim=input_shape))
model_3.add(Dropout(0.3))
model_3.add(Dense(512, activation='relu'))
model_3.add(Dropout(0.3))
model_3.add(Dense(1, activation='sigmoid'))

model_3.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-5),
              metrics=['accuracy'])
              
history = model_3.fit_generator(train_generator, steps_per_epoch= nb_train_samples//batch_size,
                              epochs=50,     
                              validation_data=val_generator, 
                              validation_steps=nb_validation_samples//batch_size , 
                              verbose=1) 
model_3.save('cats_dogs_tlearn_finetune_img_aug_cnn.h5')
predictions_3 = model_3.predict_classes(test_imgs_scaled, verbose=0)
# evaluate accuracy
acc = accuracy_score(test_labels, predictions_3)
print('Test Accuracy for VGG16 transfer learning with image Augmentation and fine tuning:', acc)