# A Quick Overview of Music Cognition

The purpose of this notebook is to explore some basic aspects of music cognition. This notebook covers two topics

1. Explore basic psychoacoustic concepts
2. Pitch perception



In [None]:
# Let's start by importing stuff
# Uncomment these line if the kernel keeps crashing
# See https://stackoverflow.com/a/53014308
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

import IPython.display as ipd
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
import numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt

from typing import Union, Callable, Dict, Any, Optional

import partitura as pt
from partitura.utils.synth import (
    additive_synthesis, 
    DistributedHarmonics,
    ShepardTones,
    midi_pitch_to_frequency,
    midi_pitch_to_natural_frequency,
)

import warnings
warnings.filterwarnings("ignore")

# Sample rate in Hz
SAMPLE_RATE = 44100
# standard tuning
A4 = 440

## Psychoacoustics

Let's start by exploring the basic relationships between properties of a physical wave and their perceptual counterparts.

We are going to generate sinusoidal waves (ie., simple tones)

$$y(t) = a_0 \sin(2 \pi f t + \phi)$$

where 

* $t$ is the time
* $f$ is the frequency
* $a_0$ is the amplitude
* $\phi$ is the phase

In the following demo, change the value of the parameters in the sliders and hear what happens and try to answer the these questions.

1. What is the perceptual effect of *changing the amplitude*?
2. What is the perceptual effect of *changing the frequency*?
3. What is the *effect of the phase*?
4. Is loudness just related to *wave amplitude*? What other component contributes to our perception of loudness?

In [None]:
# Generate Sinusoidal Waves
def gen_sine_wave(
    freq: float, 
    amplitude: float = 1.0, 
    duration:float = 0.5,
    phase:float = 0,
) -> (np.ndarray, np.ndarray):
    """
    Generate a sine wave, plot it and play it as audio
    
    Parameters
    ----------
    freq: float
        The frequency of the wave
    amplitude: float
        The Amplitude of the wave.
    duration: float
        The duration of the generated audio.
    phase: float
        The Phase of the wave
        
    Returns
    -------
    y : np.ndarray
        The generated sine wave
    t : np. ndarray
        The time axis
    """
    
    # Time axis
    t = np.linspace(0, duration, int(SAMPLE_RATE * duration))
    # Sine wave
    y = np.sin(2 * np.pi * freq* t) # + phase)
    # Renormalize to the specified amplitude
    y = amplitude * y/np.max(y)
    
    # Plot the wave and display
    plt.plot(t, y, linewidth=3, c="firebrick")
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")
    plt.xlim((0, 0.05))
    plt.show()
    ipd.display(ipd.Audio(data=y, rate=SAMPLE_RATE))
    return y, t

# Make it interactive!
interactive(
    gen_sine_wave,
    freq=widgets.FloatSlider(min=27.5, max=7040, value=440, step=10),
    amplitude=widgets.FloatSlider(min=0.1, max=0.8, value=0.5, step=0.1),
    duration=widgets.FloatSlider(min=0.2, max=2, value=0.5, step=0.1),
    phase=widgets.FloatSlider(min=0, max=2*np.pi, value=0, step=0.1)
)

### Complex tones

**Complex tones** are combinations of many simple periodic waves, each of which has its own frequency, amplitude and phase

$$
y_{complex}(t) = \sum_{n} a_n \sin(2\pi f_n t + \phi_n)
$$

The lowest frequency, $f_0$, is called the **fundamental frequency**.

* **Partials**: Each of the terms $a_n \sin(2\pi f_n t + \phi_n)$, i.e., each of the individual sine waves. 
* **Harmonics** (**harmonic partials**): If the frequencies $f_n$ of the partials can be written as $f_n = n f_o$ for $n \in  \mathbb{Z}_{\geq0}$ (positive integers). Otherwise, they are said to be *inharmonic*!
* **Overtones**: Any partial above $f_0$. Overtones do not necessarily mean *harmonics*!

#### Harmonic Series

The harmonic series is the sequence of harmonic partials of a fundamental frequency $f_0$, i.e., simple tones whose frequency is an integer multiple of a $f_0$.

The following figure illustrates the relationship between harmonics and musical intervals

<img src="example_data/harmonic_series.png" alt="harmonic_series" width="600"/>

Let's generate and hear this series of harmonics!

In [None]:
# We start the series from  C2
C = midi_pitch_to_frequency(36)
harmonic_dist = None
duration = np.ones(16)
envelope_fun = 'linear'

onsets = np.arange(16)

piece_duration = 17
num_frames = int(np.round(piece_duration * SAMPLE_RATE))

# Number of frames
num_frames = int(np.round(piece_duration * SAMPLE_RATE))

# Initialize array containing audio
harmonic_series = np.zeros(num_frames, dtype='float')

# Initialize the time axis
x = np.linspace(0, piece_duration, num=num_frames)

# onsets in frames (i.e., indices of the `audio` array)
onsets_in_frames = np.digitize(onsets, x)

# frequency of the note in herz
freq_in_hz = np.arange(1 , 17) * C

def harmonic_dist(x): 
    return x, 1

for (f, oif, dur) in zip(freq_in_hz, onsets_in_frames, duration):

    freqs, weights = harmonic_dist(f)

    note = additive_synthesis(freqs=freqs,
                              duration=dur,
                              samplerate=SAMPLE_RATE,
                              weights=weights,
                              envelope_fun=envelope_fun)
    idx = slice(oif, oif + len(note))

    harmonic_series[idx] += note

# normalization term
norm_term = max(harmonic_series.max(), abs(harmonic_series.min()))

# normalize audio
harmonic_series /= norm_term

ipd.display(ipd.Audio(data=harmonic_series, rate=SAMPLE_RATE))

#### Creating Complex Tones

Let's put everything together into a simple additive synthesizer!


In [None]:
def synthesize(
    note_info: Union[pt.score.ScoreLike, pt.performance.PerformanceLike, np.ndarray],
    samplerate: int = SAMPLE_RATE,
    envelope_fun: str = "linear",
    tuning: Union[str, Callable] = "equal_temperament",
    tuning_kwargs: Dict[str, Any] = {"a4": A4},
    harmonic_dist: Optional[Union[str, int]] = None,
    bpm: Union[float, int] = 60,
) -> np.ndarray:
    """
    Synthesize a partitura object with note information
    using additive synthesis


    Parameters
    ----------
    note_info : ScoreLike, PerformanceLike or np.ndarray
        A partitura object with note information.
    samplerate: int
        The sample rate of the audio file in Hz.
    envelope_fun: {"linear", "exp" }
        The type of envelop to apply to the individual sine waves.
    tuning: {"equal_temperament", "natural"}
    harmonic_dist : int,  "shepard" or None (optional)
        Distribution of harmonics. If an integer, it is the number
        of harmonics to be considered. If "shepard", it uses Shepard tones.
        Default is None (i.e., only consider the fundamental frequency)
    bpm : int
        The bpm to render the output (if the input is a score-like object)

    Returns
    -------
    audio_signal : np.ndarray
       Audio signal as a 1D array.
    """
    
    # Get note array from a partitura object
    note_array = pt.utils.music.ensure_notearray(note_info)

    onset_unit, duration_unit = pt.utils.music.get_time_units_from_note_array(note_array)
    if np.min(note_array[onset_unit]) <= 0:
        note_array[onset_unit] = note_array[onset_unit] + np.min(note_array[onset_unit])

    # If the input is a score, convert score time to seconds
    if onset_unit != "onset_sec":
        beat2sec = 60 / bpm
        onsets = note_array[onset_unit] * beat2sec
        offsets = (note_array[onset_unit] + note_array[duration_unit]) * beat2sec
        duration = note_array[duration_unit] * beat2sec
    else:
        onsets = note_array["onset_sec"]
        offsets = note_array["onset_sec"] + note_array["duration_sec"]
        duration = note_array["duration_sec"]

    pitch = note_array["pitch"]

    # Duration of the piece
    piece_duration = offsets.max()

    # Number of frames
    num_frames = int(np.round(piece_duration * samplerate))

    # Initialize array containing audio
    audio_signal = np.zeros(num_frames, dtype="float")

    # Initialize the time axis
    x = np.linspace(0, piece_duration, num=num_frames)

    # onsets in frames (i.e., indices of the `audio` array)
    onsets_in_frames = np.searchsorted(x, onsets, side="left")

    # frequency of the note in herz
    if tuning == "equal_temperament":
        freq_in_hz = midi_pitch_to_frequency(pitch, **tuning_kwargs)
    elif tuning == "natural":
        freq_in_hz = midi_pitch_to_natural_frequency(pitch, **tuning_kwargs)
    elif callable(tuning):
        freq_in_hz = tuning(pitch, **tuning_kwargs)

    else:
        raise ValueError(
            "`tuning` must be 'equal_temperament', 'natural' or a callable"
        )

    if harmonic_dist is None:

        def harmonic_dist(x):
            return x, 1

    elif isinstance(harmonic_dist, int):

        harmonic_dist = DistributedHarmonics(harmonic_dist)

    elif isinstance(harmonic_dist, str):
        if harmonic_dist in ("shepard",):
            harmonic_dist = ShepardTones()

    for (f, oif, dur) in zip(freq_in_hz, onsets_in_frames, duration):

        freqs, weights = harmonic_dist(f)

        note = additive_synthesis(
            freqs=freqs,
            duration=dur,
            samplerate=samplerate,
            weights=weights,
            envelope_fun=envelope_fun,
        )
        idx = slice(oif, oif + len(note))
        audio_signal[idx] += note

    # normalization term
    norm_term = max(audio_signal.max(), abs(audio_signal.min()))

    # normalize audio
    audio_signal /= norm_term
    
    # Display the audio signal
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.plot(x, audio_signal, linewidth=3, c="firebrick")
    ax.set_xlabel("Time (s)")
    ax.set_ylabel("Amplitude")
    plt.show()
    ipd.display(ipd.Audio(data=audio_signal, rate=samplerate))    
    return audio_signal


Using the parameter `harmonic_dist`, we can explore the effect of using multiple harmonics to generate a complex sound.

<img src="example_data/Happy_Birthday_to_You.png" alt="Happy_Birthday_to_You" width="600"/>

* Is there a difference in how the music sounds using just the fundamental frequency or using more harmonics?

In [None]:
#  Load the score
score = pt.load_score('example_data/Happy_Birthday_to_You.musicxml')

num_harmonics = 1
# Synthesize the signal
audio_signal = synthesize(note_info=score, harmonic_dist=num_harmonics)

#### Shepard Tones

[Shepard tones](https://en.wikipedia.org/wiki/Shepard_tone) are a a fun auditory illusion.

In the following code we generate a chromatic scale using just the fundamental frequency and shepard tones.

* What is the difference between these two signals?

In [None]:
# Initialize a note array
note_array = np.empty(
    88,
    # data types of a standard performance note array in partitura
    dtype=[('pitch', 'i4'), ('onset_sec', 'f4'), ('duration_sec', 'f4'), ('id', 'U256')]
)

# all keys in the piano!
note_array['pitch'] = np.arange(21, 109)

# duration of each note in seconds
duration = 0.3
note_array['onset_sec'] = duration * np.arange(88)
note_array['duration_sec'] = duration
note_array['id'] = np.array([f'n{i}' for i in range(88)])

print('Simple tones')
simple = synthesize(note_info=note_array, harmonic_dist=1)
print('Shepard tones')
shepard = synthesize(note_info=note_array, harmonic_dist='shepard')



## The Well Tempered... Temperament?

A [tuning system](https://en.wikipedia.org/wiki/Musical_tuning#Tuning_systems) is a system used to define the number and spacing of (fundamental) frequency values used in music.

In the following code we explore two tuning systems:

1. 12-tone equal temperament: All notes in an octave are equally spaced (i.e., the relative distance of a minor second is $\sqrt[12]{2}$). For example, if $A_4=440Hz$, $B\flat_{4} = 440 \times \sqrt[12]{2} \approx 466.16 Hz$
2. (a) Natural (Just) intonation: Using simple ratios from the harmonic scale

The following code defines these two tuning systems.

In [None]:
NATURAL_INTERVAL_RATIOS = {
    0:1,
    1:16/15, # 15/14, 11/10
    2:8/7, # 9/8, 10/9, 12/11, 13/14
    3:6/5, #7/6,
    4:5/4,
    5:4/3,
    6:7/5, #13/9,
    7:3/2,
    8:8/5,
    9:5/3,
    10:7/4, # 13/7
    11:15/8,
    12:2
    }

def midinote1etfreq(
    midi_pitch: Union[int, np.ndarray], 
    a4:float = A4
) -> Union[float, np.ndarray]:
    """
    MIDI Note to Equal Temperament Frequency
    
    Parameters
    ----------
    midi_pitch : int or nd.array of ints
        MIDI pitch to be converted to frequency
    a4 : float 
        Tuning of A4 (default 440Hz)
        
    Returns
    -------
    frequency : int or nd.array of floats
        The frequency corresponding to the input MIDI pitch
    """
    frequency = (a4 / 32) * (2 ** ((midi_pitch - 9) / 12))
    return frequency


def midinote1naturalfreq(
    midi_pitch: Union[int, np.ndarray], 
    a4: float=A4, 
    natural_interval_ratios: Dict[int, float] = NATURAL_INTERVAL_RATIOS
) -> Union[float, np.ndarray]:
    """
    MIDI Note to natural tuning
    
    Parameters
    ----------
    midi_pitch : int or nd.array of ints
        MIDI pitch to be converted to frequency
    a4 : float 
        Tuning of A4 (default 440Hz)
    natural_interval_ratios: dict
        Dictionary of the ratios defining the intervals
        
    Returns
    -------
    freqs : int or nd.array of floats
        The frequency corresponding to the input MIDI pitch
    """
    octave = (midi_pitch // 12) - 1

    aref = (69.0 - 12.0 * (4 - octave))

    aref_freq = a4 / (2.0 ** ((4 - octave)))

    interval = midi_pitch - aref

    if isinstance(interval, (int, float)):
        interval = np.array([interval], dtype=int)

    ratios =  np.zeros_like(interval)
    for i, itv in enumerate(interval):
        ratios[i] = natural_interval_ratios[abs(itv)] ** (1 if itv >= 0 else -1)

    freqs = aref_freq * ratios

    if isinstance(midi_pitch, (int, float)):
        freqs = float(freqs)
    return freqs



In [None]:
midi_fn = os.path.join('./example_data','Chopin_op10_no3_p01.mid')

# We load a performance from a MIDI file
performance = pt.load_performance_midi(midi_fn)

# We get the note array
note_array = performance.note_array()

# Synthesize a version using equal temperament
print('Equal temperament')
equal_temperament = synthesize(note_array, 
                               tuning=midinote1etfreq
                              )

# Synthesized version using natural tuning
print('Natural tuning')
natural_tuning = synthesize(note_array, 
                            tuning=midinote1naturalfreq
                           )
