diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index 1b63837d..ef6a5daa 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -43,11 +43,12 @@ RUN uv pip install \ "nvidia-nvjitlink-cu12==12.5.82" RUN uv pip install --system --force-reinstall "pynvjitlink-cu12==0.5.2" -# b/385145217 Latest Colab lacks mkl numpy, install it. -RUN uv pip install --system --force-reinstall -i https://pypi.anaconda.org/intel/simple numpy - # newer daal4py requires tbb>=2022, but libpysal is downgrading it for some reason -RUN uv pip install --system "tbb>=2022" "libpysal==4.9.2" +# b/315753846: Unpin translate package. +RUN uv pip install --system "tbb>=2022" "libpysal==4.9.2" "google-cloud-translate==3.12.1" + +# b/385145217 Latest Colab lacks mkl numpy, install it. +RUN uv pip install --system --force-reinstall -i https://software.repos.intel.com/python/pypi "numpy==1.26.4" # b/404590350: Ray and torchtune have conflicting tune cli, we will prioritize torchtune. # b/415358158: Gensim removed from Colab image to upgrade scipy @@ -58,8 +59,8 @@ ADD clean-layer.sh /tmp/clean-layer.sh ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl ADD patches/template_conf.json /opt/kaggle/conf.json -# /opt/conda/lib/python3.11/site-packages -ARG PACKAGE_PATH=/usr/local/lib/python3.11/dist-packages +# /opt/conda/lib/python3.12/site-packages +ARG PACKAGE_PATH=/usr/local/lib/python3.12/dist-packages # Install GPU-specific non-pip packages. {{ if eq .Accelerator "gpu" }} @@ -86,10 +87,14 @@ ADD patches/keras_internal.py \ RUN apt-get install -y libfreetype6-dev && \ apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing -# NLTK Project datasets -# b/408298750: We currently reinstall the package, because we get the following error: -# `AttributeError: module 'inspect' has no attribute 'formatargspec'. Did you mean: 'formatargvalues'?` -RUN uv pip install --system --force-reinstall "nltk>=3.9.1" +RUN apt-get install -y git-lfs && \ + # vtk dependencies + apt-get install -y libgl1-mesa-glx && \ + # xvfbwrapper dependencies + apt-get install -y xvfb && \ + /tmp/clean-layer.sh + +RUN uv pip install --system --force-reinstall "nltk==3.9.1" RUN mkdir -p /usr/share/nltk_data && \ # NLTK Downloader no longer continues smoothly after an error, so we explicitly list # the corpuses that work @@ -108,13 +113,6 @@ RUN mkdir -p /usr/share/nltk_data && \ twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \ vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe -RUN apt-get install -y git-lfs && \ - # vtk dependencies - apt-get install -y libgl1-mesa-glx && \ - # xvfbwrapper dependencies - apt-get install -y xvfb && \ - /tmp/clean-layer.sh - # Download base easyocr models. # https://github.com/JaidedAI/EasyOCR#usage RUN mkdir -p /root/.EasyOCR/model && \ @@ -180,7 +178,7 @@ ADD patches/kaggle_gcp.py \ # Figure out why this is in a different place? # Found by doing a export PYTHONVERBOSE=1 and then running python and checking for where it looked for it. -ADD patches/sitecustomize.py /usr/lib/python3.11/sitecustomize.py +ADD patches/sitecustomize.py /usr/lib/python3.12/sitecustomize.py ARG GIT_COMMIT=unknown \ BUILD_DATE=unknown diff --git a/config.txt b/config.txt index 61395f5e..7eaa7271 100644 --- a/config.txt +++ b/config.txt @@ -1,4 +1,4 @@ BASE_IMAGE=us-docker.pkg.dev/colab-images/public/runtime -BASE_IMAGE_TAG=release-colab_20250626-060053_RC00 +BASE_IMAGE_TAG=release-colab_20250916-060051_RC00 CUDA_MAJOR_VERSION=12 CUDA_MINOR_VERSION=5 diff --git a/kaggle_requirements.txt b/kaggle_requirements.txt index 03c489b4..b9a93751 100644 --- a/kaggle_requirements.txt +++ b/kaggle_requirements.txt @@ -9,11 +9,9 @@ PyArabic PyUpSet Pympler Rtree -shapely<2 SimpleITK # b/302136621: Fix eli5 import for learntools, newer version require scikit-learn > 1.3 TPOT==0.12.1 -Theano Wand annoy arrow @@ -49,11 +47,10 @@ geojson geopandas==v0.14.4 gensim google-cloud-aiplatform -# b/315753846: Unpin translate package. -google-cloud-translate==3.12.1 google-cloud-videointelligence google-cloud-vision google-genai +google-adk gpxpy h2o haversine @@ -74,7 +71,7 @@ jupyterlab-lsp kaggle>=1.7.4.2 kaggle-environments keras-cv -keras-nlp +keras-hub keras-tuner kornia langid @@ -109,12 +106,10 @@ preprocessing pudb pyLDAvis pycryptodome -pydegensac pydicom pydub pyemd pyexcel-ods -pymc3 pymongo pypdf pytesseract @@ -140,16 +135,15 @@ git+https://github.com/facebookresearch/segment-anything.git # b/329869023: shap 0.45.0 breaks learntools shap==0.44.1 squarify -tensorflow-cloud -tensorflow-io -tensorflow-text -tensorflow_decision_forests timm torchao torchinfo torchmetrics torchtune transformers>=4.51.0 +tensorflow-cloud +tensorflow-io +tensorflow-text triton tsfresh vtk diff --git a/tests/test_keras_nlp.py b/tests/test_keras_nlp.py index 06defce0..465a867b 100644 --- a/tests/test_keras_nlp.py +++ b/tests/test_keras_nlp.py @@ -1,6 +1,6 @@ import unittest -import keras_nlp +import keras_hub import keras import numpy as np @@ -9,7 +9,7 @@ class TestKerasNLP(unittest.TestCase): def test_fit(self): with create_test_kagglehub_server(): - classifier = keras_nlp.models.BertClassifier.from_preset( + classifier = keras_hub.models.BertClassifier.from_preset( 'bert_tiny_en_uncased', load_weights=False, # load randomly initialized model from preset architecture with weights num_classes=2, diff --git a/tests/test_numpy.py b/tests/test_numpy.py index 948455ea..0db6d631 100644 --- a/tests/test_numpy.py +++ b/tests/test_numpy.py @@ -19,13 +19,8 @@ def test_array(self): # Numpy must be linked to the MKL. (Occasionally, a third-party package will muck up the installation # and numpy will be reinstalled with an OpenBLAS backing.) def test_mkl(self): - try: - from numpy.distutils.system_info import get_info - # This will throw an exception if the MKL is not linked correctly or return an empty dict. - self.assertTrue(get_info("blas_mkl")) - except: - # Fallback to check if mkl is present via show_config() - config_out = io.StringIO() - with redirect_stdout(config_out): - np.show_config() - self.assertIn("mkl_rt", config_out.getvalue()) + # Fallback to check if mkl is present via show_config() + config_out = io.StringIO() + with redirect_stdout(config_out): + np.show_config() + self.assertIn("mkl-dynamic", config_out.getvalue()) diff --git a/tests/test_tensorflow_decision_forests.py b/tests/test_tensorflow_decision_forests.py index aeeb2e7d..58a277bf 100644 --- a/tests/test_tensorflow_decision_forests.py +++ b/tests/test_tensorflow_decision_forests.py @@ -1,18 +1,18 @@ -import unittest +# import unittest -import numpy as np -import pandas as pd -import tensorflow_decision_forests as tfdf +# import numpy as np +# import pandas as pd +# import tensorflow_decision_forests as tfdf -class TestTensorflowDecisionForest(unittest.TestCase): - def test_fit(self): - train_df = pd.read_csv("/input/tests/data/train.csv") +# class TestTensorflowDecisionForest(unittest.TestCase): +# def test_fit(self): +# train_df = pd.read_csv("/input/tests/data/train.csv") - # Convert the dataset into a TensorFlow dataset. - train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_df, label="label") +# # Convert the dataset into a TensorFlow dataset. +# train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_df, label="label") - # Train the model - model = tfdf.keras.RandomForestModel(num_trees=1) - model.fit(train_ds) +# # Train the model +# model = tfdf.keras.RandomForestModel(num_trees=1) +# model.fit(train_ds) - self.assertEqual(1, model.count_params()) \ No newline at end of file +# self.assertEqual(1, model.count_params()) \ No newline at end of file