using external dockerfile for extra requirements

MTG · Mar 22, 2019 · 93889db · 93889db
1 parent a5b5f61
commit 93889db
Show file tree

Hide file tree

Showing 15 changed files with 189 additions and 1,128 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,11 @@ Custom .gitignore file
 
 notebooks/.ipynb_checkpoints/
 data/
+Experiment Data/
+Full Dataset/
+scripts/
+venv/
+.vscode/
+
+annotation_mapper.txt
 *.pyc
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,8 @@
+FROM mtgupf/mir-toolbox
+
+RUN set -xe \
+    && apt-get update \
+    && apt-get install python3-pip -y
+
+COPY requirements.txt .
+RUN pip3 install -r requirements.txt
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,7 +1,7 @@
 version: "2"
 services:
   mir-tool:
-    image: mtgupf/mir-toolbox
+    build: .
     ports:
       - "8888:8888"
     volumes:

diff --git a/notebooks/BaselineRhythmicAssessmentSystem.ipynb b/notebooks/BaselineRhythmicAssessmentSystem.ipynb
diff --git a/notebooks/DownloadDataFromMASTDataset.ipynb b/notebooks/DownloadDataFromMASTDataset.ipynb
@@ -6,11 +6,9 @@
    "source": [
     "# Downloading audio files from MAST Rhytmic subset\n",
     "\n",
-    "### This is an auxiliary notebook which targets at the download of the rhythmic references and performances properly annotated\n",
+    "This is an auxiliary notebook that targets at the download of audio and grades annotations for a subset of references and performances from the original MAST Rhythmic dataset.\n",
     "\n",
-    "\n",
-    "###  All the downloaded data will be available in the \"data\" folder in the root of this project. The _Only Performances_ directory contains all the performances along with a _listperformances_ file which contains the list of all file names. The _Only References_ directory contains all the references along with a _listreferences_ file which contains the list of all file names. Please check that the i-eth file in the _listreferences_ file is the reference for the i-eth file in the _listperformances_ file\n",
-    "\n"
+    "All the downloaded data will be stored in the _data/_ folder in the root of this project. The _Only Performances_ directory will contain the audio for all performances along with a _listperformances_ file which lists all file names. The _Only References_ directory will contain the audio for all references along with a _listreferences_ file which lists all file names. Please note that the i-eth file in the _listreferences_ file is the reference for the i-eth file in the _listperformances_ file"
    ]
   },
   {
@@ -22,16 +20,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Downloading file ../data/MAST subset [Performances].tar.xz\n",
-      "Finished downloading data\n",
-      "\n",
-      "Downloading file ../data/MAST subset [References].tar.xz\n",
-      "Finished downloading data\n",
-      "\n",
-      "Downloading file ../data/Performances Annotations.tar.gz\n",
-      "Finished downloading data\n",
+      "Downloading file MAST subset [References].tar.xz\n",
+      "Downloading file MAST subset [Performances].tar.xz\n",
+      "Downloading file Performances Annotations.tar.gz\n",
       "\n",
-      "All data have been downloaded!\n"
+      "All folders and files were downloaded and stored in ../data/\n"
      ]
     }
    ],
@@ -41,11 +34,10 @@
     "import tarfile\n",
     "\n",
     "\n",
-    "# one must request the files urls\n",
     "FILES = {\n",
-    "    '<url>' : 'MAST subset [References].tar.xz',\n",
-    "    '<url>' : 'MAST subset [Performances].tar.xz',\n",
-    "    '<url>' : 'Performances Annotations.tar.gz'\n",
+    "    'https://www.dropbox.com/s/54w5ohm9rh9q9cb/MAST%20subset%20%5BReferences%5D.tar.xz?dl=1' : 'MAST subset [References].tar.xz',\n",
+    "    'https://www.dropbox.com/s/r3td6p1ncpvmrzy/MAST%20subset%20%5BPerformances%5D.tar.xz?dl=1' : 'MAST subset [Performances].tar.xz',\n",
+    "    'https://www.dropbox.com/s/yzvicftsjt6rr8w/Performances%20Annotations.tar.xz?dl=1' : 'Performances Annotations.tar.gz'\n",
     "}\n",
     "\n",
     "DESTINATION_FOLDER = \"../data/\"\n",
@@ -56,23 +48,28 @@
     "for file_url in FILES.keys():\n",
     "    file_name = FILES[file_url]\n",
     "    \n",
-    "    print(\"Downloading file %s\" % os.path.join(DESTINATION_FOLDER, file_name))\n",
+    "    print(\"Downloading file %s\" %  file_name)\n",
     "    \n",
+    "    # downloading file\n",
     "    urllib.request.urlretrieve(file_url, os.path.join(DESTINATION_FOLDER, file_name))\n",
     "    \n",
     "    # extracting file\n",
     "    tar = tarfile.open(os.path.join(DESTINATION_FOLDER, file_name))\n",
     "    tar.extractall(DESTINATION_FOLDER)\n",
     "    tar.close()\n",
-    "\n",
-    "    # remove auxiliar file\n",
     "    os.remove(os.path.join(DESTINATION_FOLDER, file_name))\n",
-    "\n",
-    "    print(\"Finished downloading data\\n\")\n",
-    "    \n",
-    "print(\"All data have been downloaded!\")\n",
+    "  \n",
+    "print()\n",
+    "print(\"All folders and files were downloaded and stored in %s\" % DESTINATION_FOLDER)\n",
     "    "
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

diff --git a/notebooks/PreProcessDataFromMASTDataset.ipynb b/notebooks/PreProcessDataFromMASTDataset.ipynb
@@ -6,16 +6,16 @@
    "source": [
     "## Pre-processing MAST rhythmic data\n",
     "\n",
-    "### This is an auxiliary notebook which targets at the preparation of data towards the automatic assessment analysis. It requires the data downloaded using the _DownloadDataFromMASTDataset_ notebook.\n",
+    "This is an auxiliary notebook which targets at the preparation of data towards the automatic assessment analysis. It requires the data downloaded using the _DownloadDataFromMASTDataset_ notebook.\n",
     "\n",
-    "### Along this notebook we will extract onset times information from both references and performances, scale performances according to their respective references and convert the onset times to a binary representation which might be more suitable to the application of distance measures"
+    "This material will guide us on the tasks of extracting onset times information from both references and performances, scaling performances according to their respective references and converting the onset times to a binary representation which might be more suitable to the application of distance measures"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Loading Essentia's necessary tools"
+    "First, let's import all Essentia's required modules, set a few audio processing parameters and create a few functions for audio loading and onset extraction"
    ]
   },
   {
@@ -24,25 +24,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from essentia.standard import *\n",
-    "from essentia import Pool, array\n",
-    "\n",
     "import os\n",
     "import numpy as np\n",
-    "import math\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
+    "from essentia.standard import *\n",
+    "from essentia import Pool, array\n",
+    "\n",
+    "DATA_FOLDER = \"../data/\"\n",
     "\n",
     "SAMPLE_RATE = 44100\n",
     "WINDOW_SIZE = 1024\n",
     "HOP_SIZE = 512\n",
+    "WINDOWING_METHOD = 'hann'\n",
+    "ONSET_DETECTION_METHOD = 'hfc'\n",
     "\n",
     "# used for unquantizing purposes\n",
     "ONSET_N_OF_BINS = 60\n",
     "\n",
-    "WINDOWING_METHOD = 'hann'\n",
-    "ONSET_DETECTION_METHOD = 'hfc'\n",
-    "\n",
     "def _load_file_as_monophonic_waveform(file_path):\n",
     "    fs = SAMPLE_RATE\n",
     "\n",
@@ -69,14 +68,31 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Extracting onset times and writing output to an auxiliar file. After executing the following one can check the onset times in a file inside the data folder"
+    "Next, we extract onset times for all references and performances, writing the outputs in an auxiliar file for future use. After executing the following one can check the onset times in a file inside each data folder"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 26,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.336689 0.615329 1.10295 1.40481 1.78794 2.14785 2.77478 3.01859 3.2624 3.52943 4.02866\n",
+      "0.02322 0.2322 0.812698 1.11456 1.40481 2.02014 2.64707 3.27401 3.90095 4.20281 4.50467 5.10839\n",
+      "0.0696599 0.557279 1.01007 1.24227 1.47447 1.93887 2.14785 2.38005 2.83283 3.05342 3.27401 3.4946 3.70358 3.92417 4.16798\n",
+      "0.0348299 0.278639 0.963628 1.01007 1.33515 1.79955 2.2059 3.16952 3.44816 3.69197 3.93578 4.4234\n",
+      "0.313469 0.592109 1.42803 1.69506 1.95048 2.54259 2.93732 3.28562 4.35374\n",
+      "0.20898 0.441179 0.650159 0.870748 1.10295 1.52091 1.96209 2.39166 2.798 3.00698 3.23918 3.44816 3.65714 4.08671\n",
+      "0.16254 0.684989 1.21905 2.05497 2.33361 2.60063 2.85605 3.09986 3.65714 3.93578 4.21442 5.02712\n",
+      "0.0464399 0.336689 0.893968 1.17261 1.42803 1.69506 2.6819 3.07664 4.02866\n",
+      "0.2322 0.359909 0.975238 1.2771 1.59057 2.2059 2.83283 3.11147 3.42494 4.05188 4.66721\n",
+      "0.2322 0.359909 0.975238 1.2771 1.59057 2.2059 2.83283 3.11147 3.42494 4.05188 4.66721\n"
+     ]
+    }
+   ],
    "source": [
     "def extract_onsets(base_dir, list_files, output_file):\n",
     "    with open(list_files, 'r') as listfiles:\n",
@@ -90,8 +106,6 @@
     "                \n",
     "                output.write('%s\\n' % \" \".join(list(str(x) for x in onsets)))\n",
     "\n",
-    "DATA_FOLDER = \"../data/\"\n",
-    "\n",
     "# extracting onset times for performances\n",
     "base_dir_performances = os.path.join(DATA_FOLDER, 'Only Performances/') \n",
     "list_files_performances = os.path.join(DATA_FOLDER, 'Only Performances/listperformances')\n",
@@ -104,21 +118,44 @@
     "list_files_references = os.path.join(DATA_FOLDER, 'Only References/listreferences')\n",
     "output_file_references = os.path.join(DATA_FOLDER, 'Only References/MAST Onsets [References]')\n",
     "\n",
-    "extract_onsets(base_dir_references, list_files_references, output_file_references)"
+    "extract_onsets(base_dir_references, list_files_references, output_file_references)\n",
+    "\n",
+    "with open(os.path.join(DATA_FOLDER, 'Only References/MAST Onsets [References]')) as onsets_file:\n",
+    "    for i in range(10):\n",
+    "        print(onsets_file.readline().strip())\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Rescaling performances onsets times according to their reference lengths and converting them into a binary representation (a 1 in the i-eth position means the presence of a beat in the i-eth time window). After executing the following one can check the unquantized representation of the onset times in a file inside the data folders"
+    "We then scale performances onsets times according to their reference lengths. This is a necessary step since students were allowed to play in a different tempo from the masters. Tempo-independence is achieved in our sample by scaling respecting the reference length/tempo. \n",
+    "\n",
+    "Afterwards, we convert this numeric/unquantized representation into a binary/unquantized representation. In this new representation of data, a 1 in the i-eth position translates into the presence of a beat in the i-eth time window. After executing the following one can check the unquantized representations of the onset times in a file inside the data folders. For the references file, this unquantized data is also scaled."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 27,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1000010000001000010000001000010000000000100010001000100000001\n",
+      "1010000001000100100000001000000100000010000000100100010000001\n",
+      "1000000100000010010001000001001000100000100010010010010010001\n",
+      "1001000000000100001000001000001000000000000100010010010000001\n",
+      "1000100000000000010001001000000001000001000010000000000000001\n",
+      "100010010010001000001000000100000010000010010001001001000000\n",
+      "1000001000000100000000010001001001001000000100010010000000001\n",
+      "1000100000000100010001000100000000000000100000100000000000001\n",
+      "1010000000100010001000000001000000010001000100000000100000001\n",
+      "1010000000100010001000000001000000010001000100000000100000001\n"
+     ]
+    }
+   ],
    "source": [
     "def rescale_and_make_bins(performances_file, references_file):\n",
     "    with open(performances_file, 'r') as perf_onsets_file:\n",
@@ -194,14 +231,18 @@
     "\n",
     "# running functions over data\n",
     "rescale_and_make_bins(os.path.join(DATA_FOLDER, 'Only Performances/MAST Onsets [Performances]'),\n",
-    "                         os.path.join(DATA_FOLDER, 'Only References/MAST Onsets [References]'))\n"
+    "                         os.path.join(DATA_FOLDER, 'Only References/MAST Onsets [References]'))\n",
+    "\n",
+    "with open(os.path.join(DATA_FOLDER, 'Only References/MAST Onsets [References] [BINS]')) as onsets_file:\n",
+    "    for i in range(10):\n",
+    "        print(onsets_file.readline().strip().replace(\" \",\"\"))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Plotting waveforms, Essentia onsets and binary onsets for validation purposes"
+    "Finally, we plot a few audio samples with their unquantized and quantized onsets plotted in the form of stems"
    ]
   },
   {
@@ -338,13 +379,6 @@
     "f.tight_layout()\n",
     "\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

diff --git a/notebooks/[OLD] MASTRhythmVisualAnalysis.ipynb b/notebooks/[OLD] MASTRhythmVisualAnalysis.ipynb
diff --git a/requirements.txt b/requirements.txt
@@ -1,39 +1 @@
-asn1crypto==0.24.0
-backports.ssl-match-hostname==3.5.0.1
-cached-property==1.3.1
-certifi==2018.1.18
-chardet==3.0.4
-cryptography==2.1.4
-docker==2.5.1
-docker-compose==1.17.1
-docker-pycreds==0.2.1
-dockerpty==0.4.1
-docopt==0.6.2
-enum34==1.1.6
-funcsigs==1.0.2
-functools32==3.2.3.post2
-idna==2.6
-ipaddress==1.0.17
-jsonschema==2.6.0
-keyring==10.6.0
-keyrings.alt==3.0
-mock==2.0.0
-olefile==0.45.1
-pbr==3.1.1
-Pillow==5.1.0
-psutil==5.4.2
-pycairo==1.16.2
-pycrypto==2.6.1
-pydub==0.23.0
-pygobject==3.26.1
-pyOpenSSL==17.5.0
-python-apt==1.6.3
-pyxdg==0.25
-PyYAML==3.12
-reportlab==3.4.0
-requests==2.18.4
-SecretStorage==2.3.1
-six==1.11.0
-texttable==0.9.1
-urllib3==1.22
-websocket-client==0.44.0
+textdistance