Skip to content

Commit

Permalink
Update Automatic_Speech_Recognition_Whisper_(WhisperForCTC).ipynb
Browse files Browse the repository at this point in the history
  • Loading branch information
maziyarpanahi committed Aug 28, 2023
1 parent 82239d8 commit 9e5ee8a
Showing 1 changed file with 3 additions and 86 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/audio/asr-Whisper/Automatic_Speech_Recognition_Whisper_(WhisperForCTC).ipynb)"
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/audio/whisper/Automatic_Speech_Recognition_Whisper_(WhisperForCTC).ipynb)"
]
},
{
Expand Down Expand Up @@ -65,89 +65,6 @@
"!pip install -q pyspark librosa"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting spark-nlp==5.1.0\n",
" Downloading https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/tmp/510/spark_nlp-5.1.0-py2.py3-none-any.whl (531 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m531.2/531.2 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting pyspark\n",
" Downloading pyspark-3.4.1.tar.gz (310.8 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m310.8/310.8 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Collecting py4j==0.10.9.7 (from pyspark)\n",
" Downloading py4j-0.10.9.7-py2.py3-none-any.whl (200 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.5/200.5 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hBuilding wheels for collected packages: pyspark\n",
" Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for pyspark: filename=pyspark-3.4.1-py2.py3-none-any.whl size=311285388 sha256=49a31f5d43a10e1e377eac170ddcdfeaba1fe7110558de4c19be19558dea4bf8\n",
" Stored in directory: /root/.cache/pip/wheels/0d/77/a3/ff2f74cc9ab41f8f594dabf0579c2a7c6de920d584206e0834\n",
"Successfully built pyspark\n",
"Installing collected packages: spark-nlp, py4j, pyspark\n",
" Attempting uninstall: spark-nlp\n",
" Found existing installation: spark-nlp 5.0.2\n",
" Uninstalling spark-nlp-5.0.2:\n",
" Successfully uninstalled spark-nlp-5.0.2\n",
" Attempting uninstall: py4j\n",
" Found existing installation: py4j 0.10.9.5\n",
" Uninstalling py4j-0.10.9.5:\n",
" Successfully uninstalled py4j-0.10.9.5\n",
" Attempting uninstall: pyspark\n",
" Found existing installation: pyspark 3.2.3\n",
" Uninstalling pyspark-3.2.3:\n",
" Successfully uninstalled pyspark-3.2.3\n",
"Successfully installed py4j-0.10.9.7 pyspark-3.4.1 spark-nlp-5.1.0\n"
]
}
],
"source": [
"! pip install --upgrade --force-reinstall https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/tmp/510/spark_nlp-5.1.0-py2.py3-none-any.whl pyspark"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3.4.1\n",
"5.1.0\n"
]
}
],
"source": [
"from sparknlp.base import *\n",
"from sparknlp.annotator import *\n",
"\n",
"from sparknlp.pretrained import PretrainedPipeline\n",
"import sparknlp\n",
"\n",
"from pyspark.sql import SparkSession\n",
"from pyspark.ml import Pipeline, PipelineModel\n",
"\n",
"spark = SparkSession.builder \\\n",
" .master('local[*]') \\\n",
" .appName('Spark NLP') \\\n",
" .config(\"spark.driver.memory\", \"16g\") \\\n",
" .config(\"spark.driver.maxResultSize\", \"0G\") \\\n",
" .config(\"spark.serializer\", \"org.apache.spark.serializer.KryoSerializer\") \\\n",
" .config(\"spark.kryoserializer.buffer.max\", \"2000M\") \\\n",
" .config(\"spark.jars\", \"https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/tmp/510/spark-nlp-assembly-5.1.0-rc1.jar\") \\\n",
" .getOrCreate()\n",
"\n",
"print(spark.version)\n",
"print(sparknlp.version())"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -428,8 +345,8 @@
"Length: 40221 (39K) [audio/mp3]\n",
"Saving to: ‘1664116679869-voicemaker.in-speech.mp3’\n",
"\n",
"\r",
" 166411667 0%[ ] 0 --.-KB/s \r",
"\r\n",
" 166411667 0%[ ] 0 --.-KB/s \r\n",
"1664116679869-voice 100%[===================>] 39.28K --.-KB/s in 0.02s \n",
"\n",
"2023-08-24 13:54:44 (1.98 MB/s) - ‘1664116679869-voicemaker.in-speech.mp3’ saved [40221/40221]\n",
Expand Down

0 comments on commit 9e5ee8a

Please sign in to comment.