In [2]:
%cd ./drive/My Drive/audio_compression

/content/drive/My Drive/audio_compression


In [3]:
import wave, struct
import keras
from keras.models import Model
from keras.layers import Dense, Input, LSTM, Bidirectional, Conv1D, AveragePooling1D, MaxPool1D, UpSampling1D, Flatten, Reshape
import numpy as np
import os
from scipy.io import wavfile

In [5]:
in_layer = Input(shape=(416, 1))
# Construct the encoder layers
encode = Conv1D(filters=16, kernel_size=5, padding='same', activation='relu')(in_layer)
encode = AveragePooling1D()(encode)
encode = Conv1D(filters=32, kernel_size=5, padding='same', activation='relu')(encode)
encode = AveragePooling1D()(encode)
encode = Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(encode)
encode = AveragePooling1D()(encode)
encode = Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(encode)
encode = AveragePooling1D()(encode)
encode = Conv1D(filters=64, kernel_size=3, padding='same', activation='relu')(encode)
encode = AveragePooling1D()(encode)
encode = Flatten()(encode)
encode = Dense(13, activation='relu')(encode)

# Construct the decoder layers
decode = Dense(13*64, activation='relu')(encode)
decode = Reshape((13, 64))(decode)
decode = Conv1D(filters=64, kernel_size=3, padding='same', activation='relu')(decode)
decode = UpSampling1D()(decode)
decode = Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(decode)
decode = UpSampling1D()(decode)
decode = Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(decode)
decode = UpSampling1D()(decode)
decode = Conv1D(filters=32, kernel_size=5, padding='same', activation='relu')(decode)
decode = UpSampling1D()(decode)
decode = Conv1D(filters=16, kernel_size=5, padding='same', activation='relu')(decode)
decode = UpSampling1D()(decode)
decode = Conv1D(filters=1, kernel_size=5, padding='same', activation='relu')(decode)

# The autoencoder is the whole thing
autoencoder = Model(in_layer, decode)
autoencoder.summary()

# Compile the model
autoencoder.compile('Adamax', loss='mean_squared_logarithmic_error', metrics=['accuracy'])

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 416, 1)]          0         
_________________________________________________________________
conv1d_11 (Conv1D)           (None, 416, 16)           96        
_________________________________________________________________
average_pooling1d_5 (Average (None, 208, 16)           0         
_________________________________________________________________
conv1d_12 (Conv1D)           (None, 208, 32)           2592      
_________________________________________________________________
average_pooling1d_6 (Average (None, 104, 32)           0         
_________________________________________________________________
conv1d_13 (Conv1D)           (None, 104, 32)           3104      
_________________________________________________________________
average_pooling1d_7 (Average (None, 52, 32)           

In [6]:
def load_data(DATA_FILES_WAV):
    train_data = np.array([])
    directory = os.fsencode(DATA_FILES_WAV)
    for file in os.listdir(directory):
        filename = os.fsdecode(file)
        print(filename)
        if filename.endswith(".wav"):
            sample_rate, samples = wavfile.read(DATA_FILES_WAV + '/' + filename)
            samples = np.concatenate(samples)
            samples = samples.astype(float) / float(pow(2, 15))
            samples += 1.0
            samples = samples / 2.0
            samples = np.pad(samples, (0, 416-(len(samples)%416)), 'constant')
            train_data = np.append(train_data, samples)
    return train_data

In [7]:
cp_callback = keras.callbacks.ModelCheckpoint(filepath="cp.ckpt",
                                                 save_weights_only=True,
                                                 verbose=1)

In [8]:
train_data = load_data('audio_wav_training')

0.wav
1.wav
2.wav
3.wav
4.wav
5.wav
6.wav
7.wav
8.wav
9.wav
10.wav
11.wav
12.wav
13.wav
14.wav
15.wav
16.wav
17.wav
18.wav
19.wav
20.wav
21.wav
22.wav
23.wav
24.wav
25.wav
26.wav
27.wav
28.wav
29.wav
30.wav
31.wav
32.wav
33.wav
34.wav
35.wav
36.wav
37.wav
38.wav
39.wav
40.wav
41.wav
42.wav
43.wav
44.wav
45.wav
46.wav
47.wav
48.wav
49.wav


In [9]:
autoencoder.load_weights('cp.ckpt')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f1e5acedc88>

In [10]:
train_data = train_data.reshape(len(train_data)//416 ,416, 1)
autoencoder.fit(train_data, train_data, epochs=2, shuffle=True, callbacks=[cp_callback])
autoencoder.save("autoencoder_1Channel.model")

Epoch 1/2
Epoch 00001: saving model to cp.ckpt
Epoch 2/2
Epoch 00002: saving model to cp.ckpt
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: autoencoder_1Channel.model/assets
