In [284]:
import enum
import os
import random

import music21
import music21.duration
import music21.note
import music21.pitch
import music21.scale
import numpy
import pandas
import pyknon.genmidi
import pyknon.music
import sklearn.preprocessing

In [109]:
AMINO_ACIDS = {
    "A": ( 89.094,  1.8),
    "C": (121.154,  2.5),
    "D": (133.104, -3.5),
    "E": (147.131, -3.5),
    "F": (165.192,  2.8),
    "G": ( 75.067, -0.4),
    "H": (155.156, -3.2),
    "I": (131.175,  4.5),
    "K": (146.189, -3.9),
    "L": (131.175,  3.8),
    "M": (149.208,  1.9),
    "N": (132.119, -3.5),
    "P": (115.132, -1.6),
    "Q": (146.146, -3.5),
    "R": (174.203, -4.5),
    "S": (105.093, -0.8),
    "T": (119.119, -0.7),
    "V": (117.148,  4.2),
    "W": (204.228, -0.9),
    "Y": (181.191, -1.3)
}

masses = numpy.array([x for x, _ in AMINO_ACIDS.values()])[:, numpy.newaxis]
scales = numpy.array([x for _, x in AMINO_ACIDS.values()])[:, numpy.newaxis]

masses = sklearn.preprocessing.normalize(masses, axis=0)
scales = sklearn.preprocessing.normalize(scales, axis=0)

masses = masses.ravel()
scales = scales.ravel()

features = []

for index, (x, y) in enumerate(zip(masses, scales)):
    features += [[list(AMINO_ACIDS.keys())[index], x, y]]

columns = [
    "letter", 
    "masses",
    "scales"
]

features = pandas.DataFrame(features, columns=columns)

features["masses"] = sklearn.preprocessing.minmax_scale(features["masses"], (0.0, 1.0))
features["scales"] = sklearn.preprocessing.minmax_scale(features["scales"], (0.0, 1.0))

In [251]:
notes = [
    53,
    58,
    60,
    62,
    63,
    64,
    65,
    67,
    69,
    70,
    72,
    74,
    75,
    76,
    77,
    79,
    81,
    82,
    84,
    86
]

features["notes"] = notes

In [252]:
features

Unnamed: 0,letter,masses,scales,notes
0,A,0.108601,0.7,53
1,C,0.356818,0.777778,58
2,D,0.449338,0.111111,60
3,E,0.557939,0.111111,62
4,F,0.697773,0.811111,63
5,G,0.0,0.455556,64
6,H,0.620071,0.144444,65
7,I,0.434404,1.0,67
8,K,0.550646,0.066667,69
9,L,0.434404,0.922222,70


In [257]:
with open("./data/rcsb_pdb_6VSB.fasta") as fp:
    sequence = fp.read()

sequence = sequence.splitlines()[-1].strip()

In [None]:
durations = list(features["masses"][indicies])

durations = [music21.duration.Duration(duration) for duration in durations]

In [301]:
indicies = sum([list(features.loc[features["letter"] == letter].index) for letter in sequence], [])

notes = list(features["notes"][indicies])

stream = music21.stream.Stream()

for index, (duration, note) in enumerate(zip(durations[1:], notes)):
    x = music21.note.Note(note)
    
    if index < len(notes):
        x.duration = duration

    stream.append(x)

In [306]:
stream.quantize((4, 6)).show("midi")

In [307]:
stream.write('midi', "data/6vsb.midi")

'data/6vsb.midi'