# Sort data into training and test dataframes

In [1]:
# Functions from other notebook
from ipynb.fs.full.YCAC_metric_type_functions import *
from ipynb.fs.full.YCAC_metric_type_functions import extractPieceSlicesAndDFTs

In [2]:
# This is where you put the csv file
file = "/Users/matthewchiu/Documents/YCAC/YCAC-data-1/BachSlices.csv"
bach = pd.read_csv(file)

## Set weighting to subdivisional grids and take DFT of randomized windows

#### Functions used:<br> <br> gridifyM21 — makes music into a grid format: parameters(YCAC_file, name of pieces, subdivision grid) <br> <br> extractPieceSlicesAndDFTs — randomly selects slices from grids: parameters(gridified music, #ofSlices, length of slice) <br> <br> dftSlices — returns DFT of slices as [magnitude, squared then normalized mags, phase, complex]

In [3]:
allemandeGrids = pieceTypes.gridifyM21Chords(bach, "allemande", .125)
allemandeSlices = dftSlices(extractPieceSlicesAndDFTs(allemandeGrids, 1000, 96))[1]

Assign duration weight2
Assign window for register check3
Assign weight for lowest pitch in window6
Excel parsed and pieces extracted
Chords ready to be formatted
weighting process starting
starting grids
okay


In [4]:
couranteGrids = pieceTypes.gridifyM21Chords(bach, "courante", .125)
couranteSlices = dftSlices(extractPieceSlicesAndDFTs(couranteGrids, 1000, 96))[1]

Assign duration weight2
Assign window for register check3
Assign weight for lowest pitch in window6
Excel parsed and pieces extracted
Chords ready to be formatted
weighting process starting
starting grids


In [5]:
sarabandeGrids = pieceTypes.gridifyM21Chords(bach, "sarabande", .125)
sarabandeSlices = dftSlices(extractPieceSlicesAndDFTs(sarabandeGrids, 1000, 96))[1]

Assign duration weight2
Assign window for register check3
Assign weight for lowest pitch in window6
Excel parsed and pieces extracted
Chords ready to be formatted
weighting process starting
starting grids


In [6]:
gavotteGrids = pieceTypes.gridifyM21Chords(bach, "gavotte", .125)
gavotteSlices = dftSlices(extractPieceSlicesAndDFTs(gavotteGrids, 1000, 96))[1]

Assign duration weight2
Assign window for register check3
Assign weight for lowest pitch in window6
Excel parsed and pieces extracted
Chords ready to be formatted
weighting process starting
starting grids


In [7]:
allemandeDataFrame = dataFrameWithTargets(allemandeSlices, "allemande")
couranteDataFrame = dataFrameWithTargets(couranteSlices, "courante")
sarabandeDataFrame = dataFrameWithTargets(sarabandeSlices, "sarabande")
gavotteDataFrame = dataFrameWithTargets(gavotteSlices, "gavotte")

In [8]:
allWindows = allemandeDataFrame.append(couranteDataFrame)
allWindows = allWindows.append(sarabandeDataFrame)
allWindows = allWindows.append(gavotteDataFrame)

# Training the model

In [9]:
import tensorflow as tf
from keras import backend as K

In [10]:
# Training data extracted from allWindows

trainingData = allWindows[0:950]
trainingData2 = allWindows[1000:1950]
trainingData3 = allWindows[2000:2950]
trainingData4 = allWindows[3000:3950]
trainingFrames = [trainingData, trainingData2, trainingData3, trainingData4]

testData = allWindows[950:1000]
testData2 = allWindows[1950:2000]
testData3 = allWindows[2950:3000]
testData4 = allWindows[3950:4000]
testFrames = [testData, testData2, testData3, testData4]

trainingData = pd.concat(trainingFrames)
testData = pd.concat(testFrames)

trainingData['target'] = pd.Categorical(trainingData['target'])
trainingData['target'] = trainingData.target.cat.codes
target = trainingData.pop('target')
target = oneHotCategories(target)
# Set components below—I just included first half (without f0,f1)
trainingData = trainingData.iloc[:, 2:98]

testData['target'] = pd.Categorical(testData['target'])
testData['target'] = testData.target.cat.codes
testTarget = testData.pop('target')
testTarget = oneHotCategories(testTarget)
# Set components below—I just included first half (without f0,f1)
testData = testData.iloc[:, 2:98]

## Training dataset and fit

In [11]:
dataset = tf.data.Dataset.from_tensor_slices((trainingData.values, target[0]))
train_dataset = dataset.shuffle(len(trainingData)).batch(1)

#### Model parameters are currently set to 3 levels after being fed in from components: 30->10->4. <br> Where the output (4) is the probability distribution (made by softmax function).

In [12]:
def get_compiled_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(30, activation='selu'),
        tf.keras.layers.Dense(10, activation='selu'),
        tf.keras.layers.Dense(4, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss = tf.keras.losses.CategoricalCrossentropy(),
                  metrics=['accuracy'])
    return model

In [13]:
model = get_compiled_model()
model.fit(train_dataset, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f9045baad30>

## Evaluating the model

In [14]:
# Now we evaluate the model with the separated testing data
test_dataset = tf.data.Dataset.from_tensor_slices((testData.values, testTarget[0]))
test_dataset = test_dataset.batch(1)

# Since the dataset already takes care of batching,
# we don't pass a `batch_size` argument.
model.fit(train_dataset, epochs=3)

# You can also evaluate or predict on a dataset.
print("Evaluate")
result = model.evaluate(test_dataset)
dict(zip(model.metrics_names, result))

Epoch 1/3
Epoch 2/3
Epoch 3/3
Evaluate


{'loss': 0.6635404825210571, 'accuracy': 0.7099999785423279}

## Testing independent cases (to show outputs)

In [16]:
def prepareForIndependentTests(testData):
    testData = testData
    target = testData.pop("target")
    target = oneHotCategories(target)
    target = np.array(target[0])
    testData = testData.iloc[0,2:98]
    testData = np.array([testData])
    
    return testData, target

In [20]:
# Reinitiate the data since "pop" function reshapes arrays
trainingData = allWindows[0:950]
trainingData2 = allWindows[1000:1950]
trainingData3 = allWindows[2000:2950]
trainingData4 = allWindows[3000:3950]
trainingFrames = [trainingData, trainingData2, trainingData3, trainingData4]

testData = allWindows[950:1000]
testData2 = allWindows[1950:2000]
testData3 = allWindows[2950:3000]
testData4 = allWindows[3950:4000]
testFrames = [testData, testData2, testData3, testData4]

trainingData = pd.concat(trainingFrames)
testData = pd.concat(testFrames)

trainingData['target'] = pd.Categorical(trainingData['target'])
trainingData['target'] = trainingData.target.cat.codes
testData['target'] = pd.Categorical(testData['target'])
testData['target'] = testData.target.cat.codes

In [21]:
allemandeTest = prepareForIndependentTests(testData[0:1])
couranteTest = prepareForIndependentTests(testData[50:51])
sarabandeTest = prepareForIndependentTests(testData[100:101])
gavotteTest = prepareForIndependentTests(testData[150:151])

In [22]:
print("[allemande, courante, gavotte, sarabande]")
print("")
print("allemande prediction")
print(model.predict(allemandeTest[0]))

print("")
print("courante prediction")
print(model.predict(couranteTest[0]))

print("")
print("sarabande prediction")
print(model.predict(sarabandeTest[0]))

print("")
print("gavotte prediction")
print(model.predict(gavotteTest[0]))

[allemande, courante, gavotte, sarabande]

allemande prediction
[[9.96744037e-01 2.51641101e-03 1.14682625e-04 6.24904234e-04]]

courante prediction
[[0.5687523  0.18129535 0.00154676 0.24840559]]

sarabande prediction
[[0.0139293  0.14060032 0.00175025 0.84372014]]

gavotte prediction
[[2.3599740e-03 7.5848965e-07 9.9763906e-01 2.8779084e-07]]


Prediction is respectively: allemande, allemande, sarabande, gavotte (only messing up courante)

In [26]:
# evaluate shows accuracy, whereas predict shows output
model.evaluate(allemandeTest[0],allemandeTest[1])



[0.003261249279603362, 1.0]