## MUSIC Generation woohooo

In [2]:
import numpy as np
import tensorflow as tf

import wave, math, struct

from keras.models import Sequential
from keras.layers import Dense, LSTM, Activation

In [5]:
# prepare a dummy data for simulating notes
notes_freqs = {
    'A':440.0, 'B':493.88, 'C':261.33, 'D':293.66, 'E':329.63, 'F':349.23, 'G':392.00
}

In [7]:
notes = list(notes_freqs.keys())
notes

['A', 'B', 'C', 'D', 'E', 'F', 'G']

In [11]:
note_to_int = {note:i for i, note in enumerate(notes)}
int_to_note = {i:note for i, note in enumerate(notes)}

In [None]:
raw_music_data = [notes[np.random.randint(0,7)] for i in range(1000)] # random music data

Data preparation

In [14]:
sequence_length = 3
network_input = []
network_output = []

for i in range(len(raw_music_data)-sequence_length):
    seq_in = raw_music_data[i: i+sequence_length]
    seq_out = raw_music_data[i+sequence_length]
    network_input.append([note_to_int[char] for char in seq_in])
    network_output.append(note_to_int[seq_out])

    print(seq_in, '--->', seq_out)

['F', 'G', 'G'] ---> B
['G', 'G', 'B'] ---> E
['G', 'B', 'E'] ---> D
['B', 'E', 'D'] ---> C
['E', 'D', 'C'] ---> C
['D', 'C', 'C'] ---> G
['C', 'C', 'G'] ---> A
['C', 'G', 'A'] ---> E
['G', 'A', 'E'] ---> A
['A', 'E', 'A'] ---> B
['E', 'A', 'B'] ---> E
['A', 'B', 'E'] ---> D
['B', 'E', 'D'] ---> C
['E', 'D', 'C'] ---> D
['D', 'C', 'D'] ---> A
['C', 'D', 'A'] ---> E
['D', 'A', 'E'] ---> F
['A', 'E', 'F'] ---> A
['E', 'F', 'A'] ---> A
['F', 'A', 'A'] ---> A
['A', 'A', 'A'] ---> E
['A', 'A', 'E'] ---> B
['A', 'E', 'B'] ---> D
['E', 'B', 'D'] ---> G
['B', 'D', 'G'] ---> G
['D', 'G', 'G'] ---> C
['G', 'G', 'C'] ---> C
['G', 'C', 'C'] ---> B
['C', 'C', 'B'] ---> D
['C', 'B', 'D'] ---> E
['B', 'D', 'E'] ---> E
['D', 'E', 'E'] ---> G
['E', 'E', 'G'] ---> D
['E', 'G', 'D'] ---> C
['G', 'D', 'C'] ---> G
['D', 'C', 'G'] ---> D
['C', 'G', 'D'] ---> A
['G', 'D', 'A'] ---> G
['D', 'A', 'G'] ---> G
['A', 'G', 'G'] ---> B
['G', 'G', 'B'] ---> G
['G', 'B', 'G'] ---> G
['B', 'G', 'G'] ---> E
['G', 'G', 

In [21]:
n_patterns = len(network_input)

In [None]:
x = np.reshape(network_input, (n_patterns, sequence_length, 1))
x

array([[[5],
        [6],
        [6]],

       [[6],
        [6],
        [1]],

       [[6],
        [1],
        [4]],

       ...,

       [[6],
        [4],
        [4]],

       [[4],
        [4],
        [4]],

       [[4],
        [4],
        [2]]], shape=(997, 3, 1))

In [20]:
from keras.utils import to_categorical

y = to_categorical(network_output)
y.shape

(997, 7)

Build the model

In [32]:
model = Sequential()
model.add(LSTM(256, input_shape=(x.shape[1], x.shape[2])))
model.add(Dense(1000, activation='relu'))
model.add(Dense(7, activation='softmax'))

  super().__init__(**kwargs)


In [33]:
model.summary()

In [34]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [35]:
model.fit(x,y, epochs=100)

Epoch 1/100
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.1324 - loss: 1.9629
Epoch 2/100
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.1545 - loss: 1.9497
Epoch 3/100
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1515 - loss: 1.9485
Epoch 4/100
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1555 - loss: 1.9442
Epoch 5/100
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1615 - loss: 1.9438
Epoch 6/100
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1605 - loss: 1.9436
Epoch 7/100
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1675 - loss: 1.9408
Epoch 8/100
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1675 - loss: 1.9393
Epoch 9/100
[1m32/32[0m [32m━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x17b4d5895e0>

### Generating new melody sequence

In [92]:
start_index = np.random.randint(0, len(network_input))
len(network_input)
start_index

347

In [93]:
pattern = network_input[start_index]
pattern

[3, 1, 2]

In [None]:
generated_melody = []
for i in range(32):
    x_input = np.reshape(pattern, (1, len(pattern), 1))
    prediction = model.predict(x_input, verbose=0)
    index = np.argmax(prediction)
    result = int_to_note[index]
    generated_melody.append(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]

Saving as audio file

In [96]:
with wave.open('my_music.wav', 'w') as wav_file:
    # channels (mono = 1), byte size, sample rate, 0, 'None', (compressed or not)
    wav_file.setparams((1, 2, 44100, 0, 'NONE', 'not compressed'))
    for note in generated_melody:
        freq = notes_freqs[note]
        num_samples = int(0.5*44100) # duration * sample rate
        for i in range(num_samples):
            # sample rate
            t = float(i) / 44100
            value = int(32767 * 0.5 * math.sin(2*math.pi*freq*t))
            # store in binary
            data = struct.pack('<h', value) # h for header
            wav_file.writeframes(data) 