In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pickle

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models

In [2]:
# Load data from the pickle file into a DataFrame
with open('/Users/maryam/code/MaryamS-61/phoneme_classification/raw_data/Crema_spec_cnn_input.pkl', 'rb') as file:
    loaded_df = pickle.load(file)
loaded_df

Unnamed: 0,Label,FileNames,SpecInputCNN,EncodedLabels
0,SAD,1081_ITH_SAD_XX.wav,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0]"
1,ANG,1081_IEO_ANG_LO.wav,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
2,NEU,1079_TSI_NEU_XX.wav,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0]"
3,HAP,1080_IEO_HAP_LO.wav,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[0.0, 0.0, 0.0, 1.0, 0.0, 0.0]"
4,SAD,1079_IEO_SAD_HI.wav,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0]"
...,...,...,...,...
7437,HAP,1007_IWL_HAP_XX.wav,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[0.0, 0.0, 0.0, 1.0, 0.0, 0.0]"
7438,DIS,1008_IEO_DIS_LO.wav,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0]"
7439,SAD,1006_TSI_SAD_XX.wav,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 1.0]"
7440,ANG,1006_TAI_ANG_XX.wav,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0]"


In [3]:
X = np.stack([matrix for matrix in loaded_df.SpecInputCNN])

In [4]:
y = np.stack([label for label in loaded_df.EncodedLabels])

In [5]:
X.shape

(7442, 1031, 85)

In [6]:
three_channel_image = np.stack([X] * 3, axis=-1)

In [7]:
three_channel_image.shape

(7442, 1031, 85, 3)

In [8]:
x_train, x_test, y_train, y_test = train_test_split(three_channel_image, y, test_size=0.2)

In [None]:
# Considering only 1/50th of the 7441 Spectrograms
reduction_factor = 50

# Choosing the random indices of small train set and small test set
idx_train =  np.random.choice(len(x_train), round(len(x_train)/reduction_factor), replace=False)
idx_test =  np.random.choice(len(x_test), round(len(x_test)/reduction_factor), replace=False)


In [None]:
x_train_small = x_train[idx_train]
x_test_small = x_test[idx_test]

In [None]:
labels_train_small = y[idx_train]
labels_test_small = y[idx_test]

In [None]:

print("------------------ Before -----------------")
print(x_train.shape, x_test.shape)

print("")

print("--- After applying the reduction factor ---")
print(x_train_small.shape, x_test_small.shape)

print("")
print("-"*43)

#unique, counts = np.unique(labels_train_small, return_counts=True)
#dict(zip(unique, counts))

In [9]:
from tensorflow.keras.applications import VGG16

In [10]:
def initialize_model():
    
    base_model = VGG16(weights="imagenet", include_top=False, input_shape = (1031, 85,3))

    
    model = models.Sequential()
    model.add(base_model)

    model.add(layers.Flatten())

    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.3))

    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dropout(0.3))

    model.add(layers.Dense(6, activation='softmax'))
    print("Model initialized")
    return model

In [11]:
def compile_model(model):
    return model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [12]:
model = initialize_model()
compile_model(model=model)

Model initialized


In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 32, 2, 512)        14714688  
                                                                 
 flatten (Flatten)           (None, 32768)             0         
                                                                 
 dense (Dense)               (None, 128)               4194432   
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 6)                 3

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

#es = EarlyStopping()

history = model.fit(x_train,
          y_train,
          validation_split = 0.3,
          epochs = 30,
          batch_size = 32,
          verbose = 1,
          #callbacks = [es]
          )

In [None]:
def plot_history(history, title='', axs=None, exp_name=""):
    if axs is not None:
        ax1, ax2 = axs
    else:
        f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

    if len(exp_name) > 0 and exp_name[0] != '_':
        exp_name = '_' + exp_name
    ax1.plot(history.history['loss'], label = 'train' + exp_name)
    ax1.plot(history.history['val_loss'], label = 'val' + exp_name)
    ax1.set_ylim(0., 2.2)
    ax1.set_title('loss')
    ax1.legend()

    ax2.plot(history.history['accuracy'], label='train accuracy'  + exp_name)
    ax2.plot(history.history['val_accuracy'], label='val accuracy'  + exp_name)
    ax2.set_ylim(0.25, 1)
    ax2.set_title('Accuracy')
    ax2.legend()
    return (ax1, ax2)

In [None]:
plot_history(history)

In [None]:
#model.save('first_cnn_model.h5')

In [None]:
#model.save_weights('first_cnn_model_weights_25epochs.h5')

In [None]:
#loaded_model = tf.keras.models.load_model('first_cnn_model.h5')