From 9e5ee8a4c031d9a2559160bfc15e6e895e92a2b3 Mon Sep 17 00:00:00 2001 From: Maziyar Panahi Date: Mon, 28 Aug 2023 10:51:44 +0200 Subject: [PATCH] Update Automatic_Speech_Recognition_Whisper_(WhisperForCTC).ipynb --- ..._Recognition_Whisper_(WhisperForCTC).ipynb | 89 +------------------ 1 file changed, 3 insertions(+), 86 deletions(-) diff --git a/examples/python/annotation/audio/whisper/Automatic_Speech_Recognition_Whisper_(WhisperForCTC).ipynb b/examples/python/annotation/audio/whisper/Automatic_Speech_Recognition_Whisper_(WhisperForCTC).ipynb index 6d382627b382..5c53511c3ea5 100644 --- a/examples/python/annotation/audio/whisper/Automatic_Speech_Recognition_Whisper_(WhisperForCTC).ipynb +++ b/examples/python/annotation/audio/whisper/Automatic_Speech_Recognition_Whisper_(WhisperForCTC).ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/audio/asr-Whisper/Automatic_Speech_Recognition_Whisper_(WhisperForCTC).ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/audio/whisper/Automatic_Speech_Recognition_Whisper_(WhisperForCTC).ipynb)" ] }, { @@ -65,89 +65,6 @@ "!pip install -q pyspark librosa" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting spark-nlp==5.1.0\n", - " Downloading https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/tmp/510/spark_nlp-5.1.0-py2.py3-none-any.whl (531 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m531.2/531.2 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting pyspark\n", - " Downloading pyspark-3.4.1.tar.gz (310.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m310.8/310.8 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting py4j==0.10.9.7 (from pyspark)\n", - " Downloading py4j-0.10.9.7-py2.py3-none-any.whl (200 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.5/200.5 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hBuilding wheels for collected packages: pyspark\n", - " Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for pyspark: filename=pyspark-3.4.1-py2.py3-none-any.whl size=311285388 sha256=49a31f5d43a10e1e377eac170ddcdfeaba1fe7110558de4c19be19558dea4bf8\n", - " Stored in directory: /root/.cache/pip/wheels/0d/77/a3/ff2f74cc9ab41f8f594dabf0579c2a7c6de920d584206e0834\n", - "Successfully built pyspark\n", - "Installing collected packages: spark-nlp, py4j, pyspark\n", - " Attempting uninstall: spark-nlp\n", - " Found existing installation: spark-nlp 5.0.2\n", - " Uninstalling spark-nlp-5.0.2:\n", - " Successfully uninstalled spark-nlp-5.0.2\n", - " Attempting uninstall: py4j\n", - " Found existing installation: py4j 0.10.9.5\n", - " Uninstalling py4j-0.10.9.5:\n", - " Successfully uninstalled py4j-0.10.9.5\n", - " Attempting uninstall: pyspark\n", - " Found existing installation: pyspark 3.2.3\n", - " Uninstalling pyspark-3.2.3:\n", - " Successfully uninstalled pyspark-3.2.3\n", - "Successfully installed py4j-0.10.9.7 pyspark-3.4.1 spark-nlp-5.1.0\n" - ] - } - ], - "source": [ - "! pip install --upgrade --force-reinstall https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/tmp/510/spark_nlp-5.1.0-py2.py3-none-any.whl pyspark" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3.4.1\n", - "5.1.0\n" - ] - } - ], - "source": [ - "from sparknlp.base import *\n", - "from sparknlp.annotator import *\n", - "\n", - "from sparknlp.pretrained import PretrainedPipeline\n", - "import sparknlp\n", - "\n", - "from pyspark.sql import SparkSession\n", - "from pyspark.ml import Pipeline, PipelineModel\n", - "\n", - "spark = SparkSession.builder \\\n", - " .master('local[*]') \\\n", - " .appName('Spark NLP') \\\n", - " .config(\"spark.driver.memory\", \"16g\") \\\n", - " .config(\"spark.driver.maxResultSize\", \"0G\") \\\n", - " .config(\"spark.serializer\", \"org.apache.spark.serializer.KryoSerializer\") \\\n", - " .config(\"spark.kryoserializer.buffer.max\", \"2000M\") \\\n", - " .config(\"spark.jars\", \"https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/tmp/510/spark-nlp-assembly-5.1.0-rc1.jar\") \\\n", - " .getOrCreate()\n", - "\n", - "print(spark.version)\n", - "print(sparknlp.version())" - ] - }, { "cell_type": "code", "execution_count": null, @@ -428,8 +345,8 @@ "Length: 40221 (39K) [audio/mp3]\n", "Saving to: ‘1664116679869-voicemaker.in-speech.mp3’\n", "\n", - "\r", - " 166411667 0%[ ] 0 --.-KB/s \r", + "\r\n", + " 166411667 0%[ ] 0 --.-KB/s \r\n", "1664116679869-voice 100%[===================>] 39.28K --.-KB/s in 0.02s \n", "\n", "2023-08-24 13:54:44 (1.98 MB/s) - ‘1664116679869-voicemaker.in-speech.mp3’ saved [40221/40221]\n",