In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Import necessary packages
import argparse

# Import necessary components to build LeNet
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.layers import BatchNormalization
from keras.regularizers import l2

import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import skimage.transform

In [None]:
def alexnet_model(img_shape=(224, 224, 3), n_classes=10, l2_reg=0.,
	weights=None):

	# Initialize model
	alexnet = Sequential()

	# Layer 1
	alexnet.add(Conv2D(30, (11, 11), input_shape=img_shape,
		padding='same', kernel_regularizer=l2(l2_reg)))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('relu'))
	alexnet.add(MaxPooling2D(pool_size=(2, 2)))

	# Layer 2
	alexnet.add(Conv2D(30, (5, 5), padding='same'))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('relu'))
	alexnet.add(MaxPooling2D(pool_size=(2, 2)))

	# Layer 3
	alexnet.add(ZeroPadding2D((1, 1)))
	alexnet.add(Conv2D(30, (3, 3), padding='same'))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('relu'))
	alexnet.add(MaxPooling2D(pool_size=(2, 2)))

	# Layer 4
	alexnet.add(ZeroPadding2D((1, 1)))
	alexnet.add(Conv2D(30, (3, 3), padding='same'))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('relu'))

	# Layer 5
	alexnet.add(ZeroPadding2D((1, 1)))
	alexnet.add(Conv2D(30, (3, 3), padding='same'))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('relu'))
	alexnet.add(MaxPooling2D(pool_size=(2, 2)))

	# Layer 6
	alexnet.add(Flatten())
	alexnet.add(Dense(30))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('relu'))
	alexnet.add(Dropout(0.5))

	# Layer 7
	alexnet.add(Dense(30))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('relu'))
	alexnet.add(Dropout(0.5))

	# Layer 8
	alexnet.add(Dense(n_classes))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('softmax'))

	if weights is not None:
		alexnet.load_weights(weights)

	return alexnet

def parse_args():
	"""
	Parse command line arguments.
	Parameters:
		None
	Returns:
		parser arguments
	"""
	parser = argparse.ArgumentParser(description='AlexNet model')
	optional = parser._action_groups.pop()
	required = parser.add_argument_group('required arguments')
	optional.add_argument('--print_model',
		dest='print_model',
		help='Print AlexNet model',
		action='store_true')
	parser._action_groups.append(optional)
	return parser.parse_args()

In [None]:
def load_preprocess_training_batch(X_train):
    
    new = []
    
    for item in X_train:
        tmpFeature = skimage.transform.resize(item, (224, 224), mode='constant')
        new.append(tmpFeature)

    return new

# CIFAR 10 DATASET

In [None]:
# Command line parameters
# args = parse_args()

# Create AlexNet model
model = alexnet_model()

# Print
# if args.print_model:
# 	model.summary()

In [None]:
(X_train, y_train) , (X_test, y_test) = keras.datasets.cifar10.load_data()

X_train = X_train[0:500]
y_train = y_train[0:500]
X_test = X_test[0:200]
y_test = y_test[0:200]

In [None]:
X_train_resized = load_preprocess_training_batch(X_train)
X_test_resized = load_preprocess_training_batch(X_test)

In [None]:
X_train_resized = np.array(X_train_resized)
X_test_resized = np.array(X_test_resized)

In [None]:
X_train_resized = X_train_resized / 255
X_test_resized = X_test_resized / 255

In [None]:
model.compile(optimizer='SGD',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train_resized, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
model.evaluate(X_test_resized, y_test)



[2.3610661029815674, 0.07500000298023224]

# NMIST Dataset

In [None]:
(X_train, y_train) , (X_test, y_test) = keras.datasets.mnist.load_data()

X_train = X_train[0:2000]
y_train = y_train[0:2000]
X_test = X_test[0:2000]
y_test = y_test[0:2000]

In [None]:
X_train_resized = load_preprocess_training_batch(X_train)
X_test_resized = load_preprocess_training_batch(X_test)

X_train_resized = np.array(X_train_resized)
X_test_resized = np.array(X_test_resized)

X_train_resized = X_train_resized / 255.0
X_test_resized = X_test_resized / 255.0

In [None]:
import cv2

X_train_new = list()

for i in range(len(X_train_resized)):
  g  = X_train_resized[i]
  X_train_new.append(cv2.merge([g,g,g]))

X_train_new = np.asarray(X_train_new,dtype=np.float32)

X_test_new = list()

for i in range(len(X_test_resized)):
  g  = X_test_resized[i]
  X_test_new.append(cv2.merge([g,g,g]))

X_test_new = np.asarray(X_test_new,dtype=np.float32)

In [None]:
model = alexnet_model()

model.compile(optimizer='SGD',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train_new, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
model.evaluate(X_test_new, y_test)



[2.3417277336120605, 0.11699999868869781]

# SAVEE Dataset

In [None]:
!unzip "/content/drive/MyDrive/SaveeDataset.zip"

In [None]:
import librosa
import numpy as np

input_length = 16000*5

batch_size = 32

n_mels = 320

def preprocess_audio_mel_T(audio, sample_rate=16000, window_size=20, #log_specgram
                 step_size=10, eps=1e-10):

    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels= n_mels)
    mel_db = (librosa.power_to_db(mel_spec, ref=np.max) + 40)/40

    return mel_db.T


def load_audio_file(file_path, input_length=input_length):
  data = librosa.core.load(file_path, sr=16000)[0] #, sr=16000
  if len(data)>input_length:
    max_offset = len(data)-input_length
    
    offset = np.random.randint(max_offset)
    
    data = data[offset:(input_length+offset)]
            
  else:
    if input_length > len(data):
      max_offset = input_length - len(data)

      offset = np.random.randint(max_offset)
    else:
      offset = 0
    data = np.pad(data, (offset, input_length - len(data) - offset), "constant")
    
  data = preprocess_audio_mel_T(data)
  return data

In [None]:
# Preprocessing the dataset
import os
from scipy.io import wavfile
import librosa
import matplotlib.pyplot as plt
import numpy as np
import cv2

rootDirectory = "/content/AudioData/"
personNames = ["DC","JE","JK","KL"]

classes = ["a" , "d" , "f", "h", "n", "sa" , "su" ]

X = list()
y = list()

for person in personNames:
  directory = os.path.join(rootDirectory,person)
  for filename in os.listdir(directory):
    filePath = os.path.join(directory, filename)
    a = load_audio_file(file_path=filePath)
    data = cv2.merge([a,a,a])
    if(filename[0:1] in classes):
      X.append(data)
      y.append(classes.index(filename[0:1]))
    elif(filename[0:2] in classes):
      X.append(data)
      y.append(classes.index(filename[0:2]))

In [None]:
X = np.asarray(X, dtype=np.float32)
y = np.asarray(y, dtype=np.float32)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

# dataset preparation

from tensorflow.keras import datasets,layers,models
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, train_size= 0.5 ,random_state=10)

In [None]:
X_train_resized = load_preprocess_training_batch(X_train)
X_test_resized = load_preprocess_training_batch(X_test)

X_train_resized = np.array(X_train_resized)
X_test_resized = np.array(X_test_resized)

In [None]:
model = alexnet_model()

model.compile(optimizer='SGD',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train_resized, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
model.evaluate(X_test_resized, y_test)



[2.275780200958252, 0.23749999701976776]

# EmoDB Database

In [None]:
!unzip "/content/drive/MyDrive/EmoDB.zip"

In [None]:
import librosa
import numpy as np

input_length = 16000*5

batch_size = 32

n_mels = 320

def preprocess_audio_mel_T(audio, sample_rate=16000, window_size=20, #log_specgram
                 step_size=10, eps=1e-10):

    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels= n_mels)
    mel_db = (librosa.power_to_db(mel_spec, ref=np.max) + 40)/40

    return mel_db.T


def load_audio_file(file_path, input_length=input_length):
  data = librosa.core.load(file_path, sr=16000)[0] #, sr=16000
  if len(data)>input_length:
    max_offset = len(data)-input_length
    
    offset = np.random.randint(max_offset)
    
    data = data[offset:(input_length+offset)]
            
  else:
    if input_length > len(data):
      max_offset = input_length - len(data)

      offset = np.random.randint(max_offset)
    else:
      offset = 0
    data = np.pad(data, (offset, input_length - len(data) - offset), "constant")
    
  data = preprocess_audio_mel_T(data)
  return data

In [None]:
# Preprocessing the dataset
import os
from scipy.io import wavfile
import librosa
import matplotlib.pyplot as plt
import numpy as np
import cv2

directory = "/content/wav/"

classes = ["W" ,"L" ,"E" ,"A" , "F" ,"T" ,"N" ]

X = list()
y = list()

for filename in os.listdir(directory):
  filePath = os.path.join(directory, filename)
  a = load_audio_file(file_path=filePath)
  data = cv2.merge([a,a,a])
  if(filename[5:6] in classes):
    X.append(data)
    y.append(classes.index(filename[5:6]))

In [None]:
X = np.asarray(X, dtype=np.float32)
y = np.asarray(y, dtype=np.float32)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

# dataset preparation

from tensorflow.keras import datasets,layers,models
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, train_size= 0.6 ,random_state=10)

In [None]:
X_train_resized = load_preprocess_training_batch(X_train)
X_test_resized = load_preprocess_training_batch(X_test)

X_train_resized = np.array(X_train_resized)
X_test_resized = np.array(X_test_resized)

In [None]:
model = alexnet_model()

model.compile(optimizer='SGD',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train_resized, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
model.evaluate(X_test_resized, y_test)



[2.1748006343841553, 0.23364485800266266]