In [1]:
# create wav files sine wave
!pip install wavio
import wavio
import numpy as np

# metadata
NUM_FILES = 10
SAMPLING_RATE = 44100
SAMPLE_WIDTH = 3
TONE_DUR = 30 # duration of tone in seconds
FREQ_BASE = 400
FREQ_INC = 100

FILE_PATH = '../data/'
FILE_NAME = 'sine_'

# define time
time = np.array(range(SAMPLING_RATE*TONE_DUR)) / SAMPLING_RATE
print(len(time))

!mkdir '../data/'
!mkdir '../models'
!mkdir '../music/'

# create tones and write to files
for i in range(NUM_FILES):
    freq = FREQ_BASE + FREQ_INC*i
    tone = np.sin(2*np.pi*freq*time)
    file = FILE_PATH + FILE_NAME + str(freq) + '.wav'
    wavio.write(file, tone, SAMPLING_RATE, sampwidth=SAMPLE_WIDTH)
print('done')

1323000
mkdir: cannot create directory ‘../data/’: File exists
mkdir: cannot create directory ‘../models’: File exists
mkdir: cannot create directory ‘../music/’: File exists
done


In [2]:
# import libraries
import tensorflow as tf
import numpy as np
import time
import os
import wavio

In [3]:
# define hyperparameters
# file I/O parameters
INPUT_DATA_PATH = '../data/'
OUTPUT_MODEL_PATH = '../models/'
OUTPUT_MUSIC_PATH = '../music/'
OUTPUT_MUSIC_FORMAT = '.wav'

# data parameters
SCALE = 1
INP_LEN = 10

# output file parameters
SAMPLING_RATE = 44100
SAMPLE_WIDTH = 3

# model compiling paramenters
OPTIMIZER = 'adam'
LOSS = tf.keras.losses.Huber()
METRICS = ['mae']
#lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-8 * 10**(epoch / 20))

In [4]:
# load data #TODO: change data reading from text to wav file i.e. read data from wav file into a numpy array
filenames = os.listdir(INPUT_DATA_PATH)
filenames=filenames[:10]
music = []
for file in filenames:
    m = wavio.read(INPUT_DATA_PATH+file)
    music = np.append(music, m.data)
SCALE = float(max(music))
music = music / SCALE
print(len(music))

13230000


In [5]:
# create daata from music array
data = []
for i in range(len(music)-INP_LEN):
    data.append(music[i: i+INP_LEN])
data = np.array(data)

In [6]:
# define input and output arrays
x = data[:, :-1]
y = data[:, -1]*100

print(x.shape)
print(y.shape)
print(x[0])
print(y[0])

(13229990, 9)
(13229990,)
[-1.19209304e-07  1.13734736e-01  2.25993541e-01  3.35319440e-01
  4.40293722e-01  5.39554183e-01  6.31812529e-01  7.15871419e-01
  7.90640091e-01]
85.51481789527152


In [8]:
# define model
# ## model with LSTM
# model = tf.keras.Sequential([
#     tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True), input_shape=(INP_LEN-1, 1)),
#     tf.keras.layers.Dropout(0.2),
#     tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16)),
#     tf.keras.layers.Dropout(0.2),
#     tf.keras.layers.Dense(8, activation='relu'),
#     tf.keras.layers.Dense(1)
# ])

## model with just dense layers
model = tf.keras.Sequential([
    tf.keras.Input(shape=(INP_LEN-1,)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(1),
    tf.keras.layers.Lambda(lambda x: x*100)
])

### model with Convulation
# model = tf.keras.Sequential([
#     tf.keras.layers.Conv1D(64, 5, activation='relu'),
#     tf.keras.layers.Conv1D(32, 5, activation='relu'),
#     tf.keras.layers.Dropout(0.2),
#     tf.keras.layers.Dense(16, activation='relu'),
#     tf.keras.layers.Dense(1)
# ])

### model with Convulation and LSTM
# model = tf.keras.Sequential([
#     tf.keras.layers.Conv1D(64, 5, activation='relu'),
#     tf.keras.layers.LSTM(32, return_sequences=True),
#     tf.keras.layers.Dropout(0.2),
#     tf.keras.layers.LSTM(32, return_sequences=True),
#     tf.keras.layers.Dropout(0.2),
#     tf.keras.layers.Dense(32, activation='relu'),
#     tf.keras.layers.Dense(16, activation='relu'),
#     tf.keras.layers.Dense(1)
# ])

In [9]:
# compile model
model.compile(optimizer=OPTIMIZER, loss=LOSS, metrics=METRICS)
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 256)               2560      
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 128)               32896     
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 8)                 1032      
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 9         
_________________________________________________________________
lambda_1 (Lambda)            (None, 1)                

In [10]:
# define callbacks
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('accuracy')>=0.8):
      print("Accuracy reached 80%. Stopping learning!")
      self.model.stop_training=True
callback = myCallback()

In [11]:
# fit model
history = model.fit(x, y, epochs=10, verbose=1, validation_split=0.1, batch_size=1024*64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
# save model 
output_file_name = time.strftime("%Y%m%d_%H%M%S") # TODO: change it to parameters of model instead of timestamp
model.save(OUTPUT_MODEL_PATH + output_file_name + '.h5')

In [13]:
# plot loss
#plt.semilogx(history.history["lr"], history.history["loss"])

In [14]:
# create new music and save to file
MUSIC_LENGTH = 100000                        # ~20 seconds at 44100 samples/sec
seed = np.random.choice(1, INP_LEN-1)
print(seed.shape)
print(SCALE)
music_array = np.random.choice(100, MUSIC_LENGTH)/100.0
print(music_array[:10])
for i in range(INP_LEN, MUSIC_LENGTH):
    pred = model.predict(np.array([music_array[i-INP_LEN+1:i]]))/100.0
    music_array[i] = pred
    #print(music_array[i], end=' ')


(9,)
8388607.0
[0.49 0.81 0.44 0.85 0.54 0.28 0.73 0.33 0.16 0.03]
0.0755053460597992 -0.36617764830589294 -0.5236705541610718 -0.7842646241188049 -0.9778013825416565 -0.9367656707763672 -0.9918698072433472 -0.9404826164245605 -0.8168613314628601 -0.6512342691421509 -0.49171504378318787 -0.32293400168418884 -0.10465420037508011 0.10311044752597809 0.35810908675193787 0.5518209338188171 0.7561279535293579 0.9087909460067749 1.0305849313735962 1.1059300899505615 1.122678279876709 1.0821548700332642 0.9640404582023621 0.8300961256027222 0.6307011842727661 0.4247918426990509 0.19866836071014404 -0.043774593621492386 -0.2977011799812317 -0.5235483050346375 -0.6919105052947998 -0.7886942028999329 -0.8203557729721069 -0.7985495924949646 -0.7177545428276062 -0.6178074479103088 -0.497700959444046 -0.3587885797023773 -0.2190900295972824 -0.03546959161758423 0.11892583221197128 0.2876233458518982 0.4327877461910248 0.5767009854316711 0.7051334977149963 0.8101842999458313 0.8924957513809204 0.9442

KeyboardInterrupt: ignored

In [17]:
# write music to output file TODO: write output to a wav file
output_music_file = OUTPUT_MUSIC_PATH + time.strftime("%Y%m%d_%H%M%S") + OUTPUT_MUSIC_FORMAT
wavio.write(output_music_file, SCALE*music_array, SAMPLING_RATE, sampwidth=SAMPLE_WIDTH)