Skip to content

Commit

Permalink
using external dockerfile for extra requirements
Browse files Browse the repository at this point in the history
  • Loading branch information
felipevieira committed Mar 22, 2019
1 parent a5b5f61 commit 93889db
Show file tree
Hide file tree
Showing 15 changed files with 189 additions and 1,128 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,11 @@ Custom .gitignore file

notebooks/.ipynb_checkpoints/
data/
Experiment Data/
Full Dataset/
scripts/
venv/
.vscode/

annotation_mapper.txt
*.pyc
8 changes: 8 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM mtgupf/mir-toolbox

RUN set -xe \
&& apt-get update \
&& apt-get install python3-pip -y

COPY requirements.txt .
RUN pip3 install -r requirements.txt
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: "2"
services:
mir-tool:
image: mtgupf/mir-toolbox
build: .
ports:
- "8888:8888"
volumes:
Expand Down
151 changes: 91 additions & 60 deletions notebooks/BaselineRhythmicAssessmentSystem.ipynb

Large diffs are not rendered by default.

45 changes: 21 additions & 24 deletions notebooks/DownloadDataFromMASTDataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
"source": [
"# Downloading audio files from MAST Rhytmic subset\n",
"\n",
"### This is an auxiliary notebook which targets at the download of the rhythmic references and performances properly annotated\n",
"This is an auxiliary notebook that targets at the download of audio and grades annotations for a subset of references and performances from the original MAST Rhythmic dataset.\n",
"\n",
"\n",
"### All the downloaded data will be available in the \"data\" folder in the root of this project. The _Only Performances_ directory contains all the performances along with a _listperformances_ file which contains the list of all file names. The _Only References_ directory contains all the references along with a _listreferences_ file which contains the list of all file names. Please check that the i-eth file in the _listreferences_ file is the reference for the i-eth file in the _listperformances_ file\n",
"\n"
"All the downloaded data will be stored in the _data/_ folder in the root of this project. The _Only Performances_ directory will contain the audio for all performances along with a _listperformances_ file which lists all file names. The _Only References_ directory will contain the audio for all references along with a _listreferences_ file which lists all file names. Please note that the i-eth file in the _listreferences_ file is the reference for the i-eth file in the _listperformances_ file"
]
},
{
Expand All @@ -22,16 +20,11 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading file ../data/MAST subset [Performances].tar.xz\n",
"Finished downloading data\n",
"\n",
"Downloading file ../data/MAST subset [References].tar.xz\n",
"Finished downloading data\n",
"\n",
"Downloading file ../data/Performances Annotations.tar.gz\n",
"Finished downloading data\n",
"Downloading file MAST subset [References].tar.xz\n",
"Downloading file MAST subset [Performances].tar.xz\n",
"Downloading file Performances Annotations.tar.gz\n",
"\n",
"All data have been downloaded!\n"
"All folders and files were downloaded and stored in ../data/\n"
]
}
],
Expand All @@ -41,11 +34,10 @@
"import tarfile\n",
"\n",
"\n",
"# one must request the files urls\n",
"FILES = {\n",
" '<url>' : 'MAST subset [References].tar.xz',\n",
" '<url>' : 'MAST subset [Performances].tar.xz',\n",
" '<url>' : 'Performances Annotations.tar.gz'\n",
" 'https://www.dropbox.com/s/54w5ohm9rh9q9cb/MAST%20subset%20%5BReferences%5D.tar.xz?dl=1' : 'MAST subset [References].tar.xz',\n",
" 'https://www.dropbox.com/s/r3td6p1ncpvmrzy/MAST%20subset%20%5BPerformances%5D.tar.xz?dl=1' : 'MAST subset [Performances].tar.xz',\n",
" 'https://www.dropbox.com/s/yzvicftsjt6rr8w/Performances%20Annotations.tar.xz?dl=1' : 'Performances Annotations.tar.gz'\n",
"}\n",
"\n",
"DESTINATION_FOLDER = \"../data/\"\n",
Expand All @@ -56,23 +48,28 @@
"for file_url in FILES.keys():\n",
" file_name = FILES[file_url]\n",
" \n",
" print(\"Downloading file %s\" % os.path.join(DESTINATION_FOLDER, file_name))\n",
" print(\"Downloading file %s\" % file_name)\n",
" \n",
" # downloading file\n",
" urllib.request.urlretrieve(file_url, os.path.join(DESTINATION_FOLDER, file_name))\n",
" \n",
" # extracting file\n",
" tar = tarfile.open(os.path.join(DESTINATION_FOLDER, file_name))\n",
" tar.extractall(DESTINATION_FOLDER)\n",
" tar.close()\n",
"\n",
" # remove auxiliar file\n",
" os.remove(os.path.join(DESTINATION_FOLDER, file_name))\n",
"\n",
" print(\"Finished downloading data\\n\")\n",
" \n",
"print(\"All data have been downloaded!\")\n",
" \n",
"print()\n",
"print(\"All folders and files were downloaded and stored in %s\" % DESTINATION_FOLDER)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
86 changes: 60 additions & 26 deletions notebooks/PreProcessDataFromMASTDataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@
"source": [
"## Pre-processing MAST rhythmic data\n",
"\n",
"### This is an auxiliary notebook which targets at the preparation of data towards the automatic assessment analysis. It requires the data downloaded using the _DownloadDataFromMASTDataset_ notebook.\n",
"This is an auxiliary notebook which targets at the preparation of data towards the automatic assessment analysis. It requires the data downloaded using the _DownloadDataFromMASTDataset_ notebook.\n",
"\n",
"### Along this notebook we will extract onset times information from both references and performances, scale performances according to their respective references and convert the onset times to a binary representation which might be more suitable to the application of distance measures"
"This material will guide us on the tasks of extracting onset times information from both references and performances, scaling performances according to their respective references and converting the onset times to a binary representation which might be more suitable to the application of distance measures"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Loading Essentia's necessary tools"
"First, let's import all Essentia's required modules, set a few audio processing parameters and create a few functions for audio loading and onset extraction"
]
},
{
Expand All @@ -24,25 +24,24 @@
"metadata": {},
"outputs": [],
"source": [
"from essentia.standard import *\n",
"from essentia import Pool, array\n",
"\n",
"import os\n",
"import numpy as np\n",
"import math\n",
"import matplotlib.pyplot as plt\n",
"\n",
"from essentia.standard import *\n",
"from essentia import Pool, array\n",
"\n",
"DATA_FOLDER = \"../data/\"\n",
"\n",
"SAMPLE_RATE = 44100\n",
"WINDOW_SIZE = 1024\n",
"HOP_SIZE = 512\n",
"WINDOWING_METHOD = 'hann'\n",
"ONSET_DETECTION_METHOD = 'hfc'\n",
"\n",
"# used for unquantizing purposes\n",
"ONSET_N_OF_BINS = 60\n",
"\n",
"WINDOWING_METHOD = 'hann'\n",
"ONSET_DETECTION_METHOD = 'hfc'\n",
"\n",
"def _load_file_as_monophonic_waveform(file_path):\n",
" fs = SAMPLE_RATE\n",
"\n",
Expand All @@ -69,14 +68,31 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Extracting onset times and writing output to an auxiliar file. After executing the following one can check the onset times in a file inside the data folder"
"Next, we extract onset times for all references and performances, writing the outputs in an auxiliar file for future use. After executing the following one can check the onset times in a file inside each data folder"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.336689 0.615329 1.10295 1.40481 1.78794 2.14785 2.77478 3.01859 3.2624 3.52943 4.02866\n",
"0.02322 0.2322 0.812698 1.11456 1.40481 2.02014 2.64707 3.27401 3.90095 4.20281 4.50467 5.10839\n",
"0.0696599 0.557279 1.01007 1.24227 1.47447 1.93887 2.14785 2.38005 2.83283 3.05342 3.27401 3.4946 3.70358 3.92417 4.16798\n",
"0.0348299 0.278639 0.963628 1.01007 1.33515 1.79955 2.2059 3.16952 3.44816 3.69197 3.93578 4.4234\n",
"0.313469 0.592109 1.42803 1.69506 1.95048 2.54259 2.93732 3.28562 4.35374\n",
"0.20898 0.441179 0.650159 0.870748 1.10295 1.52091 1.96209 2.39166 2.798 3.00698 3.23918 3.44816 3.65714 4.08671\n",
"0.16254 0.684989 1.21905 2.05497 2.33361 2.60063 2.85605 3.09986 3.65714 3.93578 4.21442 5.02712\n",
"0.0464399 0.336689 0.893968 1.17261 1.42803 1.69506 2.6819 3.07664 4.02866\n",
"0.2322 0.359909 0.975238 1.2771 1.59057 2.2059 2.83283 3.11147 3.42494 4.05188 4.66721\n",
"0.2322 0.359909 0.975238 1.2771 1.59057 2.2059 2.83283 3.11147 3.42494 4.05188 4.66721\n"
]
}
],
"source": [
"def extract_onsets(base_dir, list_files, output_file):\n",
" with open(list_files, 'r') as listfiles:\n",
Expand All @@ -90,8 +106,6 @@
" \n",
" output.write('%s\\n' % \" \".join(list(str(x) for x in onsets)))\n",
"\n",
"DATA_FOLDER = \"../data/\"\n",
"\n",
"# extracting onset times for performances\n",
"base_dir_performances = os.path.join(DATA_FOLDER, 'Only Performances/') \n",
"list_files_performances = os.path.join(DATA_FOLDER, 'Only Performances/listperformances')\n",
Expand All @@ -104,21 +118,44 @@
"list_files_references = os.path.join(DATA_FOLDER, 'Only References/listreferences')\n",
"output_file_references = os.path.join(DATA_FOLDER, 'Only References/MAST Onsets [References]')\n",
"\n",
"extract_onsets(base_dir_references, list_files_references, output_file_references)"
"extract_onsets(base_dir_references, list_files_references, output_file_references)\n",
"\n",
"with open(os.path.join(DATA_FOLDER, 'Only References/MAST Onsets [References]')) as onsets_file:\n",
" for i in range(10):\n",
" print(onsets_file.readline().strip())\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Rescaling performances onsets times according to their reference lengths and converting them into a binary representation (a 1 in the i-eth position means the presence of a beat in the i-eth time window). After executing the following one can check the unquantized representation of the onset times in a file inside the data folders"
"We then scale performances onsets times according to their reference lengths. This is a necessary step since students were allowed to play in a different tempo from the masters. Tempo-independence is achieved in our sample by scaling respecting the reference length/tempo. \n",
"\n",
"Afterwards, we convert this numeric/unquantized representation into a binary/unquantized representation. In this new representation of data, a 1 in the i-eth position translates into the presence of a beat in the i-eth time window. After executing the following one can check the unquantized representations of the onset times in a file inside the data folders. For the references file, this unquantized data is also scaled."
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1000010000001000010000001000010000000000100010001000100000001\n",
"1010000001000100100000001000000100000010000000100100010000001\n",
"1000000100000010010001000001001000100000100010010010010010001\n",
"1001000000000100001000001000001000000000000100010010010000001\n",
"1000100000000000010001001000000001000001000010000000000000001\n",
"100010010010001000001000000100000010000010010001001001000000\n",
"1000001000000100000000010001001001001000000100010010000000001\n",
"1000100000000100010001000100000000000000100000100000000000001\n",
"1010000000100010001000000001000000010001000100000000100000001\n",
"1010000000100010001000000001000000010001000100000000100000001\n"
]
}
],
"source": [
"def rescale_and_make_bins(performances_file, references_file):\n",
" with open(performances_file, 'r') as perf_onsets_file:\n",
Expand Down Expand Up @@ -194,14 +231,18 @@
"\n",
"# running functions over data\n",
"rescale_and_make_bins(os.path.join(DATA_FOLDER, 'Only Performances/MAST Onsets [Performances]'),\n",
" os.path.join(DATA_FOLDER, 'Only References/MAST Onsets [References]'))\n"
" os.path.join(DATA_FOLDER, 'Only References/MAST Onsets [References]'))\n",
"\n",
"with open(os.path.join(DATA_FOLDER, 'Only References/MAST Onsets [References] [BINS]')) as onsets_file:\n",
" for i in range(10):\n",
" print(onsets_file.readline().strip().replace(\" \",\"\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plotting waveforms, Essentia onsets and binary onsets for validation purposes"
"Finally, we plot a few audio samples with their unquantized and quantized onsets plotted in the form of stems"
]
},
{
Expand Down Expand Up @@ -338,13 +379,6 @@
"f.tight_layout()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
356 changes: 0 additions & 356 deletions notebooks/[OLD] MASTRhythmVisualAnalysis.ipynb

This file was deleted.

40 changes: 1 addition & 39 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,39 +1 @@
asn1crypto==0.24.0
backports.ssl-match-hostname==3.5.0.1
cached-property==1.3.1
certifi==2018.1.18
chardet==3.0.4
cryptography==2.1.4
docker==2.5.1
docker-compose==1.17.1
docker-pycreds==0.2.1
dockerpty==0.4.1
docopt==0.6.2
enum34==1.1.6
funcsigs==1.0.2
functools32==3.2.3.post2
idna==2.6
ipaddress==1.0.17
jsonschema==2.6.0
keyring==10.6.0
keyrings.alt==3.0
mock==2.0.0
olefile==0.45.1
pbr==3.1.1
Pillow==5.1.0
psutil==5.4.2
pycairo==1.16.2
pycrypto==2.6.1
pydub==0.23.0
pygobject==3.26.1
pyOpenSSL==17.5.0
python-apt==1.6.3
pyxdg==0.25
PyYAML==3.12
reportlab==3.4.0
requests==2.18.4
SecretStorage==2.3.1
six==1.11.0
texttable==0.9.1
urllib3==1.22
websocket-client==0.44.0
textdistance
Loading

0 comments on commit 93889db

Please sign in to comment.