## Package Imports

In [None]:
import pandas as pd
import os
import librosa

import numpy as np
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics

from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime

## Loading of Dataset & Metadata

In [None]:
# Taga-kuha ng mga audio dataset at metadata
audio_dataset_path = 'C:/Users/Jerome/Desktop/Jerome/Programmierung Projekte/Thesis-Projekt/ACS-master-final/backend/data/dataset_train_clean/'
metadata = pd.read_csv('C:/Users/Jerome/Desktop/Jerome/Programmierung Projekte/Thesis-Projekt/ACS-master-final/backend/data/metadata_train_clean.csv')

## Feature Extraction

In [None]:
# Function para i-extract yung features ng audio
def features_extractor(file):
    audio, sample_rate = librosa.load(file_name, res_type = 'kaiser_fast') 
    mfccs_features = librosa.feature.mfcc(y = audio, sr = sample_rate, n_mfcc = 40)
    mfccs_scaled_features = np.mean(mfccs_features.T, axis = 0)
    
    return mfccs_scaled_features

In [None]:
# Iisa-isahin yung mga audio file sa folder at i-e-extract yung features gamit MFCC
extracted_features = []
for index_num,row in tqdm(metadata.iterrows()):
    file_name = os.path.join(os.path.abspath(audio_dataset_path), str(row["fileName"]))
    final_class_labels = row["className"]
    data = features_extractor(file_name)
    extracted_features.append([data, final_class_labels])
    
# Convert lahat ng features sa Panda dataframe
extracted_features_df = pd.DataFrame(extracted_features, columns = ['feature','class'])

In [None]:
# Taga-split ng dataset sa independent at dependent dataset
X = np.array(extracted_features_df['feature'].tolist())
y = np.array(extracted_features_df['class'].tolist())

# Taga-store ng classes
y = np.array(pd.get_dummies(y))

# Split yung train_test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## CNN-RF Model

In [None]:
# Kung ilan yung classes
num_labels = y.shape[1]

cnnrf = Sequential()

# First layer ng model
cnnrf.add(Dense(100, input_shape = (40,)))
cnnrf.add(Activation('relu'))
cnnrf.add(Dropout(0.5))

# Second layer ng model
cnnrf.add(Dense(200))
cnnrf.add(Activation('relu'))
cnnrf.add(Dropout(0.5))

# Third layer ng model
cnnrf.add(Dense(100))
cnnrf.add(Activation('relu'))
cnnrf.add(Dropout(0.5))

# Fourth layer ng model
cnnrf.add(Dense(200))
cnnrf.add(Activation('relu'))
cnnrf.add(Dropout(0.5))

# Last layer ng model
cnnrf.add(Dense(num_labels))
cnnrf.add(Activation('softmax'))

cnnrf.compile(loss = 'categorical_crossentropy', metrics = ['accuracy'], optimizer = 'adam')

## CNN-RF Training

In [None]:
# Pag-train ng model
num_epochs = 110
num_batch_size = 64

checkpointer = ModelCheckpoint(filepath = 'model_checkpoints/cnnrf-v10.hdf5', 
                               verbose = 1, save_best_only = True)
start = datetime.now()

cnnrf.fit(X_train, y_train, batch_size = num_batch_size, epochs = num_epochs, validation_data = (X_test, y_test), callbacks = [checkpointer], verbose = 1)

duration = datetime.now() - start
print("Training duration ", duration)

In [None]:
test_accuracy = cnnrf.evaluate(X_train, y_train, verbose = 0)
print("Primary Training Accuracy: ", test_accuracy[1] * 100, "%")

test_accuracy = cnnrf.evaluate(X_test, y_test, verbose = 0)
print("Primary Testing Accuracy: ", test_accuracy[1] * 100, "%")

## Anvil Server Connection

In [None]:
import anvil.server

#Anvil Direct Link - https://p47ett2nfjlaqqih.anvil.app/3ZRA6CFT35X2P4QOFJI3DGAV
anvil.server.connect("JOTCSN4YS2NNQR3ZBWJT2GBU-P47ETT2NFJLAQQIH")

In [None]:
import anvil.media
import anvil.mpl_util
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns

from glob import glob

import librosa
import librosa.display
import IPython.display as ipd

from itertools import cycle

@anvil.server.callable
def classify_audio(file):
    prediction_feature = features_extractor(file)
    prediction_feature = prediction_feature.reshape(1, -1)
    return cnnrf.predict(prediction_feature)