Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,18 @@ In this version, TD-PSOLA only supports the fixed time stretching factor alpha.

You can modify pitch of the audio sequence in two ways. The first one is beta, which is the fixed pitch shifting factor. The other one is target_f0, which supports target pitch sequence you want to convert. You cannot use both of the parameters.

### Using PyTSMod from the command line

From version 0.3.0, this package includes a command-line tool named `tsmod`, which can create the result file easily from a shell. To generate the WSOLA result of `input.wav` with stretching factor 1.3 and save to `output.wav`, please run:

```shell
$ tsmod wsola input.wav output.wav 1.3 # ola, wsola, pv, pv_int are available.
```

Currently, OLA, WSOLA, and Phase Vocoder(PV) are supported. TD-PSOLA is excluded due to the difficulty of sending extracted pitch data to TD-PSOLA. Also, non-linear TSM is not supported in command-line.

For more information, use `-h` or `--help` command to see the detailed usage of `tsmod`.

## Audio examples

The original audio is from TSM toolbox.
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pytsmod"
version = "0.2.0"
version = "0.3.0"
description = "An open-source Python library for audio time-scale modification."
authors = ["Sangeon Yong <koragon2@kaist.ac.kr>"]

Expand All @@ -18,8 +18,8 @@ librosa = "^0.8"
pytest = "^5.2"
flake8 = "^3.8.3"

# [tool.poetry.scripts]
# tsmod = 'pytsmod.console:run'
[tool.poetry.scripts]
tsmod = 'pytsmod.console:run'

[build-system]
requires = ["poetry>=0.12"]
Expand Down
2 changes: 1 addition & 1 deletion pytsmod/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.2.0'
__version__ = '0.3.0'

from .tdpsolatsm import *
from .wsolatsm import *
Expand Down
2 changes: 2 additions & 0 deletions pytsmod/console/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .console import run
from .descs import *
114 changes: 114 additions & 0 deletions pytsmod/console/console.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import sys
sys.path.append('./')

from pytsmod import ola, wsola
from pytsmod import phase_vocoder as pv
from pytsmod import phase_vocoder_int as pv_int
from pytsmod.console import *
import argparse
import soundfile as sf


def run():
parser = argparse.ArgumentParser(description=TSMOD_DESC)
subparsers = parser.add_subparsers(help=SUBPARSER_HELP,
dest='subparser_name')

# create parser for OLA.
parser_ola = subparsers.add_parser('ola', help=OLA_HELP,
description=OLA_DESC)
parser_ola.add_argument('input_file', type=str, help=INPUT_HELP)
parser_ola.add_argument('output_file', type=str, help=OUTPUT_HELP)
parser_ola.add_argument('alpha', type=float, help=A_HELP)
parser_ola.add_argument('--win_type', '-wt', default='hann', type=str,
help=WT_HELP)
parser_ola.add_argument('--win_size', '-ws', default=1024, type=int,
help=WS_HELP)
parser_ola.add_argument('--syn_hop_size', '-sh', default=512, type=int,
help=SH_HELP)

# create parser for WSOLA.
parser_wsola = subparsers.add_parser('wsola', help=WSOLA_HELP,
description=WSOLA_DESC)
parser_wsola.add_argument('input_file', type=str, help=INPUT_HELP)
parser_wsola.add_argument('output_file', type=str, help=OUTPUT_HELP)
parser_wsola.add_argument('alpha', type=float, help=A_HELP)
parser_wsola.add_argument('--win_type', '-wt', default='hann', type=str,
help=WT_HELP)
parser_wsola.add_argument('--win_size', '-ws', default=1024, type=int,
help=WS_HELP)
parser_wsola.add_argument('--syn_hop_size', '-sh', default=512, type=int,
help=SH_HELP)
parser_wsola.add_argument('--tolerance', '-t', default=512, type=int,
help=TOL_HELP)

# create parser for phase-vocoder.
parser_pv = subparsers.add_parser('pv', help=PV_HELP,
description=PV_DESC)
parser_pv.add_argument('input_file', type=str, help=INPUT_HELP)
parser_pv.add_argument('output_file', type=str, help=OUTPUT_HELP)
parser_pv.add_argument('alpha', type=float, help=A_HELP)
parser_pv.add_argument('--win_type', '-wt', default='sin', type=str,
help=WT_HELP)
parser_pv.add_argument('--win_size', '-ws', default=2048, type=int,
help=WS_HELP)
parser_pv.add_argument('--syn_hop_size', '-sh', default=512, type=int,
help=SH_HELP)
parser_pv.add_argument('--zero_pad', '-z', default=0, type=int,
help=ZP_HELP)
parser_pv.add_argument('--restore_energy', '-e', action='store_true',
help=RE_HELP)
parser_pv.add_argument('--fft_shift', '-fs', action='store_true',
help=FS_HELP)
parser_pv.add_argument('--phase_lock', '-pl', action='store_true',
help=PL_HELP)

# create parser for phase-vocoder int.
parser_pvi = subparsers.add_parser('pv_int', help=PVI_HELP,
description=PVI_DESC)
parser_pvi.add_argument('input_file', type=str, help=INPUT_HELP)
parser_pvi.add_argument('output_file', type=str, help=OUTPUT_HELP)
parser_pvi.add_argument('alpha', type=int, help=A_PVI_HELP)
parser_pvi.add_argument('--win_type', '-wt', default='hann', type=str,
help=WT_HELP)
parser_pvi.add_argument('--win_size', '-ws', default=2048, type=int,
help=WS_HELP)
parser_pvi.add_argument('--syn_hop_size', '-sh', default=512, type=int,
help=SH_HELP)
parser_pvi.add_argument('--zero_pad', '-z', default=None, type=int,
help=ZP_HELP)
parser_pvi.add_argument('--restore_energy', '-e', action='store_true',
help=RE_HELP)
parser_pvi.add_argument('--fft_shift', '-fs', action='store_true',
help=FS_HELP)

args = parser.parse_args()

x, sr = sf.read(args.input_file)

if args.subparser_name == 'ola':
y = ola(x, args.alpha, win_type=args.win_type, win_size=args.win_size,
syn_hop_size=args.syn_hop_size)
elif args.subparser_name == 'wsola':
y = wsola(x, args.alpha, win_type=args.win_type,
win_size=args.win_size, syn_hop_size=args.syn_hop_size,
tolerance=args.tolerance)
elif args.subparser_name == 'pv':
y = pv(x, args.alpha, win_type=args.win_type, win_size=args.win_size,
syn_hop_size=args.syn_hop_size, zero_pad=args.zero_pad,
restore_energy=args.restore_energy, fft_shift=args.fft_shift,
phase_lock=args.phase_lock)
elif args.subparser_name == 'pv_int':
y = pv_int(x, args.alpha, win_type=args.win_type,
win_size=args.win_size, syn_hop_size=args.syn_hop_size,
zero_pad=args.zero_pad,
restore_energy=args.restore_energy,
fft_shift=args.fft_shift)
# elif args.subparser_name == 'hp':
# pass

sf.write(args.output_file, y.T, sr)


if __name__ == '__main__':
run()
28 changes: 28 additions & 0 deletions pytsmod/console/descs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
TSMOD_DESC = 'Processing time-scale modification for given audio file.'
SUBPARSER_HELP = 'Available TSM algorithms'

INPUT_HELP = 'Input audio file to modify.'
OUTPUT_HELP = 'Output file path to save.'

A_HELP = 'The time stretching factor alpha.'
WT_HELP = 'Type of the window function. hann and sin are available.'
WS_HELP = 'Size of the window function.'
SH_HELP = 'Hop size of the synthesis window.'

OLA_HELP = 'Using OLA to modify audio file.'
OLA_DESC = 'Using OLA to modify audio file.'

WSOLA_HELP = 'Using WSOLA to modify audio file.'
WSOLA_DESC = 'Using WSOLA to modify audio file.'
TOL_HELP = 'Number of samples the window positions in the input signal may be shifted'

PV_HELP = 'Using phase vocoder to modify audio file.'
PV_DESC = 'Using phase vocoder to modify audio file.'
ZP_HELP = 'The size of the zero pad in the window function.'
RE_HELP = 'Try to reserve potential energy loss.'
FS_HELP = 'Apply circular shift to STFT and ISTFT.'
PL_HELP = 'Apply phase locking.'

PVI_HELP = 'Using phase vocoder specialized for integer stretching factor.'
PVI_DESC = 'Using phase vocoder specialized for integer stretching factor.'
A_PVI_HELP = 'The time stretching factor alpha. Only integer value is allowed.'
2 changes: 1 addition & 1 deletion pytsmod/pvtsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def phase_vocoder_int(x, s, win_type='hann', win_size=2048, syn_hop_size=512,

y[c, :] = y_chan

return y
return y.squeeze()


def _find_peaks(spec):
Expand Down
155 changes: 155 additions & 0 deletions tests/test_console.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import pytest
from pytsmod import ola, wsola
from pytsmod import phase_vocoder as pv
from pytsmod import phase_vocoder_int as pv_int
import soundfile as sf
import numpy as np
import os
from subprocess import call


@pytest.mark.parametrize('algorithm', ['ola', 'wsola', 'pv', 'pv_int'])
def test_console_default_params(algorithm):
test_file = 'tests/data/castanetsviolin.wav'
alpha = 2
x, sr = sf.read(test_file)
y = globals()[algorithm](x, alpha)

cmd = ['python', 'pytsmod/console/console.py', algorithm,
test_file, 'temp_cli.wav', str(alpha)]
if algorithm == 'pv_int':
cmd.append('-fs')
call(cmd)

sf.write('temp.wav', y, sr)
y_, _ = sf.read('temp.wav')

y_cli, _ = sf.read('temp_cli.wav')

os.remove('temp.wav')
os.remove('temp_cli.wav')

assert np.allclose(y_, y_cli)


@pytest.mark.parametrize('alpha', [1.25])
@pytest.mark.parametrize('win_type', ['sin'])
@pytest.mark.parametrize('win_size', [512])
@pytest.mark.parametrize('syn_hop_size', [256])
def test_console_ola(alpha, win_type, win_size, syn_hop_size):
test_file = 'tests/data/castanetsviolin.wav'
x, sr = sf.read(test_file)
y = ola(x, alpha, win_type=win_type, win_size=win_size,
syn_hop_size=syn_hop_size)

cmd = ['python', 'pytsmod/console/console.py', 'ola',
test_file, 'temp_cli.wav', str(alpha),
'-wt', win_type, '-ws', str(win_size),
'-sh', str(syn_hop_size)]
call(cmd)

sf.write('temp.wav', y, sr)
y_, _ = sf.read('temp.wav')

y_cli, _ = sf.read('temp_cli.wav')

os.remove('temp.wav')
os.remove('temp_cli.wav')

assert np.allclose(y_, y_cli)


@pytest.mark.parametrize('alpha', [1.25])
@pytest.mark.parametrize('win_type', ['sin'])
@pytest.mark.parametrize('win_size', [512])
@pytest.mark.parametrize('syn_hop_size', [256])
@pytest.mark.parametrize('tolerance', [256])
def test_console_wsola(alpha, win_type, win_size, syn_hop_size, tolerance):
test_file = 'tests/data/castanetsviolin.wav'
x, sr = sf.read(test_file)
y = wsola(x, alpha, win_type=win_type, win_size=win_size,
syn_hop_size=syn_hop_size, tolerance=tolerance)

cmd = ['python', 'pytsmod/console/console.py', 'wsola',
test_file, 'temp_cli.wav', str(alpha),
'-wt', win_type, '-ws', str(win_size),
'-sh', str(syn_hop_size), '-t', str(tolerance)]
call(cmd)

sf.write('temp.wav', y, sr)
y_, _ = sf.read('temp.wav')

y_cli, _ = sf.read('temp_cli.wav')

os.remove('temp.wav')
os.remove('temp_cli.wav')

assert np.allclose(y_, y_cli)


@pytest.mark.parametrize('alpha', [1.25])
@pytest.mark.parametrize('win_type', ['hann'])
@pytest.mark.parametrize('win_size', [1024])
@pytest.mark.parametrize('syn_hop_size', [256])
@pytest.mark.parametrize('zero_pad', [256])
@pytest.mark.parametrize('restore_energy', [True])
@pytest.mark.parametrize('fft_shift', [True])
@pytest.mark.parametrize('phase_lock', [True])
def test_console_pv(alpha, win_type, win_size, syn_hop_size, zero_pad,
restore_energy, fft_shift, phase_lock):
test_file = 'tests/data/castanetsviolin.wav'
x, sr = sf.read(test_file)
y = pv(x, alpha, win_type=win_type, win_size=win_size,
syn_hop_size=syn_hop_size, zero_pad=zero_pad,
restore_energy=restore_energy, fft_shift=fft_shift,
phase_lock=phase_lock)

cmd = ['python', 'pytsmod/console/console.py', 'pv',
test_file, 'temp_cli.wav', str(alpha),
'-wt', win_type, '-ws', str(win_size),
'-sh', str(syn_hop_size), '-z', str(zero_pad),
'-e', '-fs', '-pl']
call(cmd)

sf.write('temp.wav', y, sr)
y_, _ = sf.read('temp.wav')

y_cli, _ = sf.read('temp_cli.wav')

os.remove('temp.wav')
os.remove('temp_cli.wav')

assert np.allclose(y_, y_cli)


@pytest.mark.parametrize('alpha', [2])
@pytest.mark.parametrize('win_type', ['sin'])
@pytest.mark.parametrize('win_size', [1024])
@pytest.mark.parametrize('syn_hop_size', [256])
@pytest.mark.parametrize('zero_pad', [256])
@pytest.mark.parametrize('restore_energy', [True])
@pytest.mark.parametrize('fft_shift', [False])
def test_console_pv_int(alpha, win_type, win_size, syn_hop_size, zero_pad,
restore_energy, fft_shift):
test_file = 'tests/data/castanetsviolin.wav'
x, sr = sf.read(test_file)
y = pv(x, alpha, win_type=win_type, win_size=win_size,
syn_hop_size=syn_hop_size, zero_pad=zero_pad,
restore_energy=restore_energy, fft_shift=fft_shift)

cmd = ['python', 'pytsmod/console/console.py', 'pv',
test_file, 'temp_cli.wav', str(alpha),
'-wt', win_type, '-ws', str(win_size),
'-sh', str(syn_hop_size), '-z', str(zero_pad),
'-e']
call(cmd)

sf.write('temp.wav', y, sr)
y_, _ = sf.read('temp.wav')

y_cli, _ = sf.read('temp_cli.wav')

os.remove('temp.wav')
os.remove('temp_cli.wav')

assert np.allclose(y_, y_cli)
2 changes: 1 addition & 1 deletion tests/test_pytsmod.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@


def test_version():
assert __version__ == '0.2.0'
assert __version__ == '0.3.0'