# Run all cells in order and read comments to understand what happens exactly.

In [None]:
# Dem Libraries!
%matplotlib inline
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm_notebook as tqdm
from keras.models import Model
from keras.layers import Input, Reshape
from keras.layers.core import Dense, Activation, Dropout, Flatten
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import UpSampling1D, Conv1D
from keras.layers.advanced_activations import LeakyReLU
from keras.optimizers import Adam, SGD
from keras.callbacks import TensorBoard
import pretty_midi

In [None]:
# To get the sine waves data
# n_samples : Number of sine waves you want generated
def sample_data(n_samples=10000, x_vals=np.arange(0, 5, .1), max_offset=100, mul_range=[1, 2]):
    vectors = []
    for i in range(n_samples):
        offset = np.random.random() * max_offset
#         print("Offset",offset)
        mul = mul_range[0] + np.random.random() * (mul_range[1] - mul_range[0])
#         print("Mul",mul)
        vectors.append(
            np.sin(offset + x_vals * mul) / 2 + .5
        )
    return np.array(vectors)

#Plot 5 sine waves as a test to visually check if it works
ax = pd.DataFrame(np.transpose(sample_data(5))).plot()

In [None]:
#Generator Model :
# Needs to be able to take in random noise and generate realistic looking output data

def get_generative(G_in, dense_dim=200, out_dim=50, lr=1e-3):
    x = Dense(dense_dim)(G_in)
    x = Activation('tanh')(x)
    G_out = Dense(out_dim, activation='tanh')(x)
    G = Model(G_in, G_out)
    opt = SGD(lr=lr)
    G.compile(loss='binary_crossentropy', optimizer=opt)
    return G, G_out

G_in = Input(shape=[10])
G, G_out = get_generative(G_in)
G.summary()

In [None]:
#Discriminator Model :
# Needs to be able to take in a generated signal and distinguish between real and fake(generated)

def get_discriminative(D_in, lr=1e-3, drate=.25, n_channels=50, conv_sz=5, leak=.2):
    x = Reshape((-1, 1))(D_in)
    
    x = Conv1D(n_channels, conv_sz, activation='relu')(x)
    x = Dropout(drate)(x)
    x = Flatten()(x)
    x = Dense(n_channels)(x)
    D_out = Dense(2, activation='sigmoid')(x)
    D = Model(D_in, D_out)
    dopt = Adam(lr=lr)
    D.compile(loss='binary_crossentropy', optimizer=dopt)
    return D, D_out

D_in = Input(shape=[50])
D, D_out = get_discriminative(D_in)
D.summary()

In [None]:
# Function to make sure the Discriminators weights are frozen, while training the Generator
def set_trainability(model, trainable=False):
    model.trainable = trainable
    for layer in model.layers:
        layer.trainable = trainable

        
def make_gan(GAN_in, G, D):
    set_trainability(D, False)
    x = G(GAN_in)
    GAN_out = D(x)
    GAN = Model(GAN_in, GAN_out)
    GAN.compile(loss='binary_crossentropy', optimizer=G.optimizer)
    return GAN, GAN_out

GAN_in = Input([10])
GAN, GAN_out = make_gan(GAN_in, G, D)
GAN.summary()

In [None]:
# Function to get a concatenated input of real_data and fake_data with appropriate concatenated labels
def sample_data_and_gen(G, noise_dim=10, n_samples=10000):
    # Generated Real Samples 
    XT = sample_data(n_samples=n_samples)
    # Get the random noise that'll be used to generate the fake data
    XN_noise = np.random.uniform(0, 1, size=[n_samples, noise_dim])
    # Generate the Fake Samples
    XN = G.predict(XN_noise)
    # Concatenat the Real and Fake Data
    X = np.concatenate((XT, XN))
    # Concatenate the labels and set the appropriate values
    y = np.zeros((2*n_samples, 2))
    y[:n_samples, 1] = 1
    y[n_samples:, 0] = 1
    return X, y

def pretrain(G, D, noise_dim=10, n_samples=10000, batch_size=32):
    # Get the real samples for the pretraining of the discriminator
    X, y = sample_data_and_gen(G, n_samples=n_samples, noise_dim=noise_dim)
    # Allow the Discriminator to be trained
    set_trainability(D, True)
    # Fit the Discriminator with 1 epoch
    D.fit(X, y, epochs=1, batch_size=batch_size)


In [None]:
# Perform One Train run for the Determiner
pretrain(G, D)

In [None]:
# Get sample noise for the GAN training cycle
def sample_noise(G, noise_dim=10, n_samples=10000):
    X = np.random.uniform(0, 1, size=[n_samples, noise_dim])
    y = np.zeros((n_samples, 2))
    y[:, 1] = 1
    return X, y

# Training the GAN by backpropagating the losses of the discriminator on the generated samples, 
# whilst freezing discriminator weights

def train(GAN, G, D, epochs=50, n_samples=10000, noise_dim=10, batch_size=32, verbose=False, v_freq=50):
    d_loss = []
    g_loss = []
    e_range = range(epochs)
    if verbose:
        e_range = tqdm(e_range)
    #For each EPOCH
    for epoch in e_range:
        # Get True and Generated Samples with Labels
        X, y = sample_data_and_gen(G, n_samples=n_samples, noise_dim=noise_dim)
        # Allow Discriminator to be Trained
        set_trainability(D, True)
        # Evaluate the Discriminator loss on this set(only to record loss)
        d_loss.append(D.train_on_batch(X, y))
        
        # Now just get the generated samples
        X, y = sample_noise(G, n_samples=n_samples, noise_dim=noise_dim)
        # Freeze the Discriminator Weights
        set_trainability(D, False)
        # Evaluate the Generator loss on this set
        g_loss.append(GAN.train_on_batch(X, y))
        if verbose and (epoch + 1) % v_freq == 0:
            print("Epoch #{}: Generative Loss: {}, Discriminative Loss: {}".format(epoch + 1, g_loss[-1], d_loss[-1]))
    return d_loss, g_loss


In [None]:
d_loss, g_loss = train(GAN, G, D, verbose=True)

In [None]:
N_VIEWED_SAMPLES = 2
data_and_gen, _ = sample_data_and_gen(G, n_samples=N_VIEWED_SAMPLES)

#To View the Generated Output
pd.DataFrame(np.transpose(data_and_gen[N_VIEWED_SAMPLES:])).plot()
#To View a smoothened version of the Generated Output(using a rollling mean)
#pd.DataFrame(np.transpose(data_and_gen[N_VIEWED_SAMPLES:])).rolling(5).mean()[5:].plot()


In [None]:
ax = pd.DataFrame(
    {
        'Generative Loss': g_loss,
        'Discriminative Loss': d_loss,
    }
).plot(title='Training loss', logy=True)
ax.set_xlabel("Epochs")
ax.set_ylabel("Loss")

# Code following this is for playing around to understand how the MidiGan dataset is made

In [None]:
instru = open('./trial/instruments.json')

In [None]:
arr = np.load('./trial/arrays.npz',mmap_mode='r')
on_roll = np.load('./trial/onset_rolls.npz',mmap_mode='r')
pi_roll = np.load('./trial/piano_rolls.npz',mmap_mode='r')

In [None]:
# To access the name of the Piano Roll files
pi_roll.files

# Once you get the name of the argument you wish to see, 
pi_roll['name of the argurment']

In [None]:
arr.files

In [None]:
arr['downbeat_times']

In [None]:
import json
with open('./trial/instruments.json') as f:
    datastore = json.load(f)

In [None]:
datastore

In [None]:
pi_roll.files

In [None]:
len(pi_roll['0_csc_indptr'])

In [None]:
pi_roll['0_csc_data']

In [None]:
len(pi_roll['0_csc_indices'])

In [None]:
pi_roll['2_csc_indptr']

In [None]:
pi_roll['0_csc_shape']

In [None]:
from collections import Counter

In [None]:
Counter(pi_roll['11_csc_indptr'])

In [None]:
Counter(pi_roll['10_csc_indices'])

In [None]:
pi_roll['11_csc_indptr']

In [None]:
pi_roll['6_csc_indptr']

In [None]:
pi_roll['5_csc_indptr']

In [None]:
pi_roll['5_csc_data']

In [None]:
pi_roll['6_csc_data']

In [None]:
pi_roll['5_csc_indices']

In [None]:
pi_roll['6_csc_indices']

In [None]:
arr['downbeat_times']

In [None]:
pi_roll['0_csc_shape']

In [None]:
piano_notes = np.zeros((11520,128))

# Using Ben's function

In [None]:
midi_data= pretty_midi.PrettyMIDI("./Sweet Child O' Mine.mid")
piano=midi_data.instruments[0]
piano_roll=piano.get_piano_roll(16)
np.shape(piano_roll)

In [None]:
def pianoroll_to_midi(piano_roll, fs=16, program=0):
    '''Converts a Piano Roll array to a PrettyMidi object
     with a single instrument.
    
    Input:
    piano_roll : np.ndarray, shape=(128,frames), dtype=int
        Piano roll of one instrument
    fs : int
        Sampling frequency of the columns, i.e. each column is spaced apart
        by ``1./fs`` seconds.
    program : int
        The program number of the instrument.
    
    Returns:
    midi_object : pretty_midi.PrettyMIDI
        A pretty_midi.PrettyMIDI class instance describing
        the piano roll.
    '''
    period=1./fs
    
    notes, frames = piano_roll.shape #get number of frames in our piano roll
    pm = pretty_midi.PrettyMIDI() #create a Pretty Midi object
    instrument = pretty_midi.Instrument(program=0) #specify our instrument

    #record previous pitch/velocity so we can concatenate notes together
    prev_pitch=0
    prev_velocity=0
    notelength=0
    starttime=0
    endtime=period
    
    i=0
    while i<frames: #range over the frames of the piano roll

        #need to specify velocity (100 for note, 0 for rest),  start time, end time, and pitch
        
        
        #for ith column of piano roll get the (possible) non-zero index which
        #corresponds to the pitch
        col=piano_roll[:,i] 
        colnext=piano_roll[:,i+1]
        tmp=np.nonzero(col)
        tmpnext=np.nonzero(colnext)
        
        if tmp[0].size==0: #current is rest note, don't need to worry about length
            
            velocity=0
            starttime=i*period
            endtime=period+i*period
            current_pitch=0
           
        
        else:
            #get current pitch and set the start time
            velocity=100
            current_pitch=tmp[0][0]
            starttime=i*period
            
            
            #loop over future notes to find when pitch changes
            pitchchange=False
            while pitchchange==False:
                
                #if end of song quit
                if i==frames-1:
                    endtime=period+i*period
                    break
                    
                #get next note
                colnext=piano_roll[:,i+1]
                tmpnext=np.nonzero(colnext)
                
                #if next note is a rest
                if tmpnext[0].size==0: 
                    endtime=period+i*period
                    break
                    
                #if next frame has different pitch
                elif tmpnext[0][0]!=current_pitch: 
                    endtime=period+i*period
                    pitchchange=True
            
                else:
                    #increment to next frame
                    i=i+1
                    
            
        pm_note=pretty_midi.Note(velocity=100, pitch=current_pitch, start=starttime, end=endtime)
        i=i+1
        #we have appended note, now move to next note   
        instrument.notes.append(pm_note)
        #print starttime
        #print endtime
        
    pm.instruments.append(instrument)

    return pm

In [None]:
mypm=pianoroll_to_midi(piano_roll, fs=16, program=0)

In [None]:
mypm.write('testmid.mid')

In [None]:
piano_roll.shape

In [None]:
np.transpose(piano_roll)[8]

In [None]:
X = sample_data()

In [None]:
pt = np.transpose(np.transpose(piano_roll)[:16])

In [None]:
pt.shape

In [None]:
mypm=pianoroll_to_midi(pt, fs=16, program=0)

In [None]:
mypm.write('testmid16.mid')

In [None]:
piano_roll_T = np.transpose(piano_roll)

In [None]:
BarData = []
for i in range(piano_roll_T.shape[0]):
    BarData.append(np.transpose(piano_roll_T[i:i+16]))

In [None]:
len(BarData)

In [None]:
x = np.matrix(np.random.randint(5, size=(16,16)))

In [None]:
data_x = np.transpose(x,(0,2,3,1))

In [None]:
x

# Using Shrey's Curated Training Data

In [1]:
import os
import pretty_midi
import numpy as np

train_data_path = './Training Stage 1/'

In [12]:
def get_midi_content(training_path):
    filenames = []
    lens = []
    fin_midi_stream = []
    for filename in os.listdir(training_path):
        filenames.append(os.path.join(training_path,filename))
    fin_midi_streama = []
    fin_midi_files = []
    min_melody_len = 999999 #a high number
    for file in filenames:
        temp = pretty_midi.PrettyMIDI(file)
        melody = temp.instruments[0]
    #     chord = temp.instruments[1]
        melody_math = melody.get_piano_roll(16)
        curr_stream = np.argmax(melody_math, axis=0)
        fin_midi_streama.extend(curr_stream)
    chop_size = 200
    num_of_chops = len(fin_midi_stream)//chop_size
    for i in range(num_of_chops):
        fin_midi_files.append(fin_midi_stream[(i)*num_of_chops:(i+1)*num_of_chops])
    return fin_midi_files

In [13]:
MF = get_midi_content(train_data_path)

In [14]:
for i in range(len(MF)):
    print(len(MF[i]))

In [15]:
len(MF)

0

In [5]:
from collections import Counter

In [6]:
gg = Counter(L)

In [None]:
temp = midi_files[0]

In [None]:
melody = temp.instruments[0]
chord = temp.instruments[1]

In [None]:
melody_math = melody.get_piano_roll(16)

In [None]:
int(melody_math.shape[1])

In [None]:
melody_math.shape

In [None]:
idx = np.argmax(melody_math, axis=0)

In [None]:
for i in idx:
    print(i)

In [None]:
f = np.matrix([])

In [None]:
min_len = 99999
for i in range(len(L)):
    if (L[i])<min_len:
        min_len = (L[i])
print(min_len)

In [17]:

training_path = './Training Stage 1/'
filenames = []
lens = []
fin_midi_stream = []
for filename in os.listdir(training_path):
    filenames.append(os.path.join(training_path,filename))
fin_midi_streama = []
fin_midi_files = []
min_melody_len = 999999 #a high number
for file in filenames:
    temp = pretty_midi.PrettyMIDI(file)
    melody = temp.instruments[0]
#     chord = temp.instruments[1]
    melody_math = melody.get_piano_roll(16)
    curr_stream = np.argmax(melody_math, axis=0)
    fin_midi_streama.extend(curr_stream)
chop_size = 200
num_of_chops = len(fin_midi_streama)//chop_size
for i in range(num_of_chops):
    fin_midi_files.append(fin_midi_streama[(i)*chop_size:(i+1)*chop_size])

In [54]:
len(fin_midi_files)

23887

gg = Counter(fin_midi_streama)

In [53]:
gg.most_common(40)

[(0, 1342557),
 (73, 264882),
 (75, 250538),
 (77, 240745),
 (82, 232834),
 (80, 229811),
 (72, 228652),
 (84, 221067),
 (79, 211520),
 (76, 210166),
 (74, 206346),
 (78, 199635),
 (85, 196568),
 (87, 195867),
 (81, 189981),
 (86, 178767),
 (83, 177546)]

In [57]:
X = np.random.uniform(73, 87, size=[23877, 200])

In [58]:
X[0]

array([ 80.55391626,  84.45646418,  74.53829942,  76.29345248,
        85.55848922,  74.43020755,  81.3142567 ,  84.73011338,
        82.15470647,  74.84810167,  73.74753144,  86.86872991,
        82.43358581,  85.96641822,  81.5648937 ,  77.01376868,
        83.31646173,  78.55451131,  76.31906938,  75.83942452,
        79.3824229 ,  75.0621685 ,  77.28196406,  86.05608212,
        80.63318035,  77.96501085,  73.11899177,  80.41722584,
        74.19939917,  73.0877949 ,  81.00298984,  83.08442972,
        77.46214885,  74.11236943,  84.21421854,  84.32910176,
        84.02816834,  77.67424822,  83.4960424 ,  74.0684403 ,
        80.52911314,  86.73199597,  82.72331938,  83.69152889,
        81.70543384,  86.42980989,  86.69569393,  80.72004166,
        86.25726497,  86.82374801,  76.20483978,  79.87642865,
        83.92497186,  76.05081444,  80.87743338,  77.75659638,
        77.62639084,  86.36502477,  76.69274584,  81.4583118 ,
        77.33507179,  80.06533707,  73.90895729,  76.55