In [None]:
# Upgrade environment to support TF 2.10 in Colab
!pip uninstall tensorflow --yes
!pip uninstall tensorflow-io --yes
# !pip install tensorflow-gpu
# !pip install --no-deps tensorflow-io
!pip install tensorflow==2.8.0
!pip install tensorflow-io==0.25.0
!pip install pydub

Found existing installation: tensorflow 2.8.0
Uninstalling tensorflow-2.8.0:
  Successfully uninstalled tensorflow-2.8.0
Found existing installation: tensorflow-io 0.25.0
Uninstalling tensorflow-io-0.25.0:
  Successfully uninstalled tensorflow-io-0.25.0
Collecting tensorflow==2.8.0
  Using cached tensorflow-2.8.0-cp310-cp310-manylinux2010_x86_64.whl (497.6 MB)
Installing collected packages: tensorflow
Successfully installed tensorflow-2.8.0


Collecting tensorflow-io==0.25.0
  Using cached tensorflow_io-0.25.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (23.4 MB)
Installing collected packages: tensorflow-io
Successfully installed tensorflow-io-0.25.0




In [None]:
import numpy as np
import tensorflow as tf
import librosa
import tensorflow_io as tfio

In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

model_path = r"/content/drive/MyDrive/model_ResNet_100.h5"

model = tf.keras.models.load_model(model_path)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def decode_audio(audio_binary):
  # Decode WAV-encoded audio files to `float32` tensors, normalized
  # to the [-1.0, 1.0] range. Return `float32` audio and a sample rate.
  audio, sample_rate = tf.audio.decode_wav(contents=audio_binary, desired_channels=1)
  audio = tf.squeeze(audio, axis=-1)
  sample_rate = tf.cast(sample_rate, dtype=tf.int64)
  return tfio.audio.resample(audio, rate_in=sample_rate, rate_out=16000)

In [None]:
def get_spectrogram(waveform):
  # Zero-padding for an audio waveform with less than 16,000 samples.
  input_len = 16000
  waveform = waveform[:input_len]
  zero_padding = tf.zeros([16000] - tf.shape(waveform), dtype=tf.float32)
  # Cast the waveform tensors' dtype to float32.
  waveform = tf.cast(waveform, dtype=tf.float32)
  # Concatenate the waveform with `zero_padding`, which ensures all audio
  # clips are of the same length.
  equal_length = tf.concat([waveform, zero_padding], 0)
  # Convert the waveform to a spectrogram via a STFT.
  spectrogram = tf.signal.stft(
      equal_length, frame_length=320, frame_step=32)
  # Obtain the magnitude of the STFT.
  spectrogram = tf.abs(spectrogram)
  # Add a `channels` dimension, so that the spectrogram can be used
  # as image-like input data with convolution layers (which expect
  # shape (`batch_size`, `height`, `width`, `channels`).
  spectrogram = tf.expand_dims(spectrogram, axis=2)
  return spectrogram

In [None]:
chords = ['Bb' ,'Am', 'G', 'Dm', 'Bdim', 'C', 'F', 'Em']

def preprocess_input_wav(file_path):
    waveform = decode_audio(tf.io.read_file(file_path))
    spectrogram = get_spectrogram(waveform)
    spectrogram = tf.expand_dims(spectrogram, -1)
    return spectrogram
def make_prediction(model, input_wav_path):
    input_data = preprocess_input_wav(input_wav_path)
    input_data = tf.expand_dims(input_data, 0)  # Add batch dimension
    predictions = model.predict(input_data)
    predicted_label_id = np.argmax(predictions)
    print(predictions)
    predicted_chord = chords[predicted_label_id]
    return predicted_chord


In [None]:
def get_spectrogram(waveform):
  # Zero-padding for an audio waveform with less than 16,000 samples.
  input_len = 16000
  waveform = waveform[:input_len]
  zero_padding = tf.zeros([16000] - tf.shape(waveform), dtype=tf.float32)
  # Cast the waveform tensors' dtype to float32.
  waveform = tf.cast(waveform, dtype=tf.float32)
  # Concatenate the waveform with `zero_padding`, which ensures all audio
  # clips are of the same length.
  equal_length = tf.concat([waveform, zero_padding], 0)
  # Convert the waveform to a spectrogram via a STFT.
  spectrogram = tf.signal.stft(
      equal_length, frame_length=320, frame_step=32)
  # Obtain the magnitude of the STFT.
  spectrogram = tf.abs(spectrogram)
  # Add a `channels` dimension, so that the spectrogram can be used
  # as image-like input data with convolution layers (which expect
  # shape (`batch_size`, `height`, `width`, `channels`).
  spectrogram = tf.expand_dims(spectrogram, axis=2)
  return spectrogram

In [None]:
input_wav_path = '/content/Dm_AcousticGuitar_RodrigoMercador_1.wav'
prediction = make_prediction(model, input_wav_path)
print(f'The predicted chord is: {prediction}')

[[4.8056356e-16 1.1735539e-10 1.1707156e-16 1.0000000e+00 1.6335668e-20
  2.0950579e-13 3.6212893e-09 9.2454472e-10]]
The predicted chord is: Dm


In [None]:
chords = {'C':'I',   'G':'V',    'Am':'vi',    'F':'IV','Dm':'ii','Bdim':'VIIb','Bb':'vii'}

In [None]:
from transformers import AutoModelWithLMHead, AutoTokenizer

# Load the fine-tuned model and tokenizer
fine_tuned_model = AutoModelWithLMHead.from_pretrained(r'/content/drive/MyDrive/fine_tuned_model_directory')
tokenizer = AutoTokenizer.from_pretrained(r'/content/drive/MyDrive/fine_tuned_model_directory')

# seed_chord = chords[prediction]
seed_chord = 'vi'
input_ids = tokenizer.encode(seed_chord, return_tensors="pt")
output = fine_tuned_model.generate(
        input_ids,
        max_length=20,
        temperature=0.1,
        pad_token_id=tokenizer.eos_token_id,
        num_return_sequences=1,
    )

generated_chords = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_chords.split(' '))

['vi', 'I', 'V', 'IV']


In [None]:
from pydub import AudioSegment
from pydub.playback import play

def generate_guitar_chord(file_path, duration=1000):
    # Load guitar sample
    guitar_chord = AudioSegment.from_file(file_path)

    # Ensure the sample duration matches the desired duration
    guitar_chord = guitar_chord[:duration]

    return guitar_chord

# Chord progression: I, VI#, v, IV
chord_progression = generated_chords.split(' ')

# Path to guitar samples (replace with your actual file paths)
sample_paths = {
    'I': r'/content/drive/MyDrive/Audio_Files/C_AcousticGuitar_RodrigoMercador_1 (1).wav',
    'V': r'/content/drive/MyDrive/Audio_Files/G_AcousticGuitar_RodrigoMercador_1.wav',
    'vi': r'/content/drive/MyDrive/Audio_Files/Am_AcousticGuitar_RodrigoMercador_1.wav',
    'IV': r'/content/drive/MyDrive/Audio_Files/F_AcousticGuitar_RodrigoMercador_1.wav',
     'VI#':r'/content/drive/MyDrive/Audio_Files/A#m_Bbm9StrumGuitar_01_526.wav',
    'v':r'/content/drive/MyDrive/Audio_Files/guitar-chord-g-minor_G_minor.wav',
    'iii':r'/content/drive/MyDrive/Audio_Files/Em_AcousticGuitar_RodrigoMercador_1.wav',
    'ii':r'/content/drive/MyDrive/Audio_Files/Dm_AcousticGuitar_RodrigoMercador_1.wav',
    'I#': r'/content/drive/MyDrive/Audio_Files/C#_C#add9SlapGuitar_01_526.wav',
    'III':r'/content/drive/MyDrive/Audio_Files/E_StrumAcGtr_01_714.wav',
    'IV#':r'/content/drive/MyDrive/Audio_Files/F#_F#StrumGuitar_01_526.wav',
    'II#':r'/content/drive/MyDrive/Audio_Files/D#_StrongChordAcGuitar_01_577.wav',
    'II':r'/content/drive/MyDrive/Audio_Files/D_SoftAcGuitar_01_577.wav',
    'VIIb':r'/content/drive/MyDrive/Audio_Files/Bdim_AcousticGuitar_RodrigoMercador_1.wav',
    'i':r'/content/drive/MyDrive/Audio_Files/Cm_Gm7_BossaNylonGtr_01_714.wav',
    'V#':r'/content/drive/MyDrive/Audio_Files/G#_F9StrumGuitar_01_526.wav',
    'vii':r'/content/drive/MyDrive/Audio_Files/Bb_AcousticGuitar_RodrigoMercador_1.wav',
    'VI':r'/content/drive/MyDrive/Audio_Files/A_StrumAcGtr_01_714.wav'
}

# Generate audio for each chord and concatenate them
full_music = AudioSegment.silent(duration=0)
for chord_symbol in chord_progression:
    if chord_symbol in sample_paths:
        chord_audio = generate_guitar_chord(sample_paths[chord_symbol])
        full_music += chord_audio
    else:
        print(f"Chord '{chord_symbol}' not recognized.")

# Play the generated music
full_music.export("guitar_music_output.wav", format="wav")

<_io.BufferedRandom name='guitar_music_output.wav'>

In [None]:
from IPython.display import Audio

Audio("guitar_music_output.wav")