In [None]:
# @title Installieren von Bibliotheken
!pip install mido
!pip install audonnx
!pip install audinterface

import mido
from mido import MidiFile, MidiTrack, Message, MetaMessage
import pandas as pd
import os
import audeer
import librosa
import numpy as np
import audonnx
import audinterface



Collecting mido
  Downloading mido-1.3.0-py3-none-any.whl (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.3/50.3 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mido
Successfully installed mido-1.3.0
Collecting audonnx
  Downloading audonnx-0.6.5-py3-none-any.whl (295 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.0/296.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting audobject>=0.7.2 (from audonnx)
  Downloading audobject-0.7.11-py3-none-any.whl (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting onnx (from audonnx)
  Downloading onnx-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.7/15.7 MB[0m [31m62.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting onnxruntime>=1.8.0 (from audonnx)
  Downloading onnxru

In [None]:
# @title Modell Laden und Daten verbereiten

# Modell Laden nach Github Tutorial
model_root = 'model'
cache_root = 'cache'


audeer.mkdir(cache_root)
def cache_path(file):
    return os.path.join(cache_root, file)


url = 'https://zenodo.org/record/6221127/files/w2v2-L-robust-12.6bc4a7fd-1.1.0.zip'
dst_path = cache_path('model.zip')

if not os.path.exists(dst_path):
    audeer.download_url(
        url,
        dst_path,
        verbose=True,
    )

if not os.path.exists(model_root):
    audeer.extract_archive(
        dst_path,
        model_root,
        verbose=True,
    )

import audonnx

model = audonnx.load(model_root)
model


import numpy as np

np.random.seed(0)

sampling_rate = 16000
signal = np.random.normal(
    size=sampling_rate,
).astype(np.float32)

model(signal, sampling_rate)


import audinterface

interface = audinterface.Feature(
    model.labels('logits'),
    process_func=model,
    process_func_args={
        'outputs': 'logits',
    },
    sampling_rate=sampling_rate,
    resample=True,
    verbose=True,
)

interface.process_signal(signal, sampling_rate)



Unnamed: 0_level_0,Unnamed: 1_level_0,arousal,dominance,valence
start,end,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0 days,0 days 00:00:01,0.656339,0.630771,0.476233


In [None]:
# @title Verarbeitung von Audiosignalen und Erkennung von Emotionen

# Angabe des Pfades zum Ordner und Laden des Modells über 'addonnx'
model_root = 'model'
model = audonnx.load(model_root)

# Definierung des Dateipfades der Audio-Datei und Laden des Signals über 'librosa'
input_file = '/content/Good Will Hunting.wav'
interval_length = 5# in Sekunden
signal, sampling_rate = librosa.load(input_file, sr=16000, mono=True)
total_intervals = int(np.ceil(signal.shape[0] / (sampling_rate * interval_length)))


# Erzeugung des Interface's und Übergabe des Schlüsselwortargumentes 'logits' (VAD-Werte)
interface = audinterface.Feature(
    model.labels('logits'),
    process_func=model,
    process_func_args={'outputs': 'logits'},
    sampling_rate=sampling_rate,
    resample=True,
    verbose=True
)

# Erstellung von Array-Listen für die Auslagerung der VAD-Werte
arousal_list = []
valence_list = []
dominance_list = []

# Iteration über das Audio Signal und SER-Klassifizierung alle 5 Sekunden:
for i in range(total_intervals):
    # 1. Berechnung der Start- und Endpunkte des abgetasteten Signals
    start_pos = int(i * sampling_rate * interval_length)
    end_pos = int(min((i+1) * sampling_rate * interval_length, signal.shape[0]))

    # 2. Übergabe der Grenzpunkte an das Interface
    interval_signal = signal[start_pos:end_pos]
    output = interface.process_signal(interval_signal, sampling_rate)

    # 3. Ausgabe der VAD-Werte
    print(output)

    # 4. Übergabe der VAD-Werte an Variablen
    arousal = output['arousal']
    valence = output['valence']
    dominance = output['dominance']

    # 5. Speicherung der VAD-Werte in Array-Listen
    arousal_list.append(arousal)
    valence_list.append(valence)
    dominance_list.append(dominance)





                         arousal  dominance   valence
start  end                                           
0 days 0 days 00:00:05  0.509337   0.582969  0.742633
                         arousal  dominance  valence
start  end                                          
0 days 0 days 00:00:05  0.661582   0.669507  0.74715
                                   arousal  dominance   valence
start  end                                                     
0 days 0 days 00:00:01.886062500  0.779296   0.738261  0.885204


In [None]:
# @title Nachbearbeitung der Ergebnisse der Emotionserkennung

# Übergabe der VAD-Werte als DataFrame Format
df = pd.DataFrame(arousal_list)
df['notes'] = np.arange(len(df))

dff = pd.DataFrame(valence_list)
dff['notes'] = np.arange(len(dff))

dfff = pd.DataFrame(dominance_list)
dfff['notes'] = np.arange(len(dfff))

# Ausgabe des DataFrame Formats
print(df)

# Anpassung von nicht erkannten VAD-Werten zu den Wert '0'
arousal_values = df.iloc[:, 0].fillna(0).values
valence_values = dff.iloc[:, 0].fillna(0).values
dominance_values = dfff.iloc[:, 0].fillna(0).values

# Anpassung von erkannten VAD-Werten außerhalb des [0,1] Spektrums
## Iteration durch das Arousal DataFrame
for i in range(len(arousal_values)):
    if arousal_values[i] > 1:
        arousal_values[i] = 1
    elif arousal_values[i] < 0:
        arousal_values[i] = 0
## Iteration durch das Valence DataFrame
for i in range(len(valence_values)):
    if valence_values[i] > 1:
        valence_values[i] = 1
    elif valence_values[i] < 0:
        valence_values[i] = 0
## Iteration durch das Dominace DataFrame
for i in range(len(dominance_values)):
    if dominance_values[i] > 1:
        dominance_values[i] = 1
    elif dominance_values[i] < 0:
        dominance_values[i] = 0

# Ausgabe der angepassten VAD-Werte
print(arousal_values)
print(valence_values)
print(dominance_values)

start   0 days 00:00:00                           notes
end     0 days 00:00:05 0 days 00:00:01.886062500   NaT
arousal        0.509337                       NaN     0
arousal        0.661582                       NaN     1
arousal             NaN                  0.779296     2
[0.5093366 0.6615819 0.       ]
[0.7426325  0.74715006 0.        ]
[0.5829686 0.6695073 0.       ]


In [None]:
# @title Generierung von Intervallen

# Definierung des Pfades der Ausgabe-Datei
output_file = "/content/GoodWillHunting.mid"
channel = 0
# Erstellung einer MIDI-Spur und Festlegung der
track = MidiTrack()
track.append(Message('program_change', program=0, time=0))

# Instanziierung des Grundtons der Intervalle als Mittleres C
root_note = 60

# Iteration über ein dreidimensionales Array zur Generierung von musikalischen Intervalle
for a, v, d in zip(arousal_values, valence_values, dominance_values):
  # Mapping von musikalischen Intervallen auf bestimmte Grenzbereiche der VAD-Werte
  if (0.4 <= a <= 0.6) and (0.25 <= v <= 0.5):
    # Generierung von Obertönen, basierend auf 'Valence' und 'Arousal'. Wiederholt sich in jeder Iteration
    chord_note = 72 # C - Oktave
    # Festlegung der Anschlagshärte, basierend auf 'Dominance'. Wiederholt sich in jeder Iteration
    velocity = int(d * 127)
  elif (0.6 <= a <= 1) and (0.25 <= v <= 0.5):
    chord_note = 61 # C# - Kleine Sekunde
    velocity = int(d * 127)
  elif (0.4 <= a <= 0.6) and (0.5 <= v <= 0.75):
    chord_note = 62 # D - Große Sekunde
    velocity = int(d * 127)
  elif (0 <= a <= 0.4) and (0 <= v <= 0.25):
    chord_note = 63 # D# - Kleine Terz
    velocity = int(d * 127)
  elif (0.6 <= a <= 1) and (0.75 <= v <= 1):
    chord_note = 64 # E - Große Terz
    velocity = int(d * 127)
  elif (0.4 <= a <= 0.6) and (0.75 <= v <= 1):
    chord_note = 65 # F - Reine Quarte
    velocity = int(d * 127)
  elif (0.6 <= a <= 1) and (0 <= v <= 0.25):
    chord_note = 66 # F# - Tritonus
    velocity = int(d * 127)
  elif (0 <= a <= 0.4) and (0.5 <= v <= 0.75):
    chord_note = 67 # G - Reine Quinte
    velocity = int(d * 127)
  elif (0 <= a <= 0.4) and (0.25 <= v <= 0.5):
    chord_note = 68 # G# - Kleine Sexte
    velocity = int(d * 127)
  elif (0.5 <= a <= 0.75) and (0.6 <= v <= 1):
    chord_note = 69 # A - Große Sexte
    velocity = int(d * 127)
  elif (0.4 <= a <= 0.6) and (0 <= v <= 0.25):
    chord_note = 70 # A# - Kleine Septime
    velocity = int(d * 127)
  elif (0 <= a <= 0.4) and (0.75 <= v <= 1):
    chord_note = 71 # B - Große Septime
    velocity = int(d * 127)
  else:
    chord_note = None
    velocity = None

  if chord_note:
    # Hinzufügen von entsprechenden Musik-Intervallen zu einem MIDI-Track, bei Belegung bestimmter Grenzbereiche der VAD-Werte
    track.append(Message('note_on', note=chord_note, velocity=velocity, time=0, channel=channel))
    track.append(Message('note_on', note=root_note, velocity=velocity, time=0, channel=channel))

    track.append(Message('note_off', note=chord_note, velocity=0, time=2000, channel=channel))
    track.append(Message('note_off', note=root_note, velocity=0, time=0, channel=channel))

  else:
    print("No MIDI note found for the given arousal and valence values")


# Hinzufügen des erstellten MIDI-Tracks zur MIDI-Datei
mid = MidiFile()
mid.tracks.append(track)
mid.save(output_file)

print(f"MIDI file saved: {output_file}")

MIDI file saved: /content/GoodWillHunting.mid
