From 3d1c9686cd3f256af599e2cad433883321188ac1 Mon Sep 17 00:00:00 2001 From: Jonathan Calderon Chavez Date: Mon, 22 Sep 2025 20:00:01 +0000 Subject: [PATCH 1/4] test run --- Dockerfile.tmpl | 27 +-------------------------- config.txt | 2 +- kaggle_requirements.txt | 11 +---------- 3 files changed, 3 insertions(+), 37 deletions(-) diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index 1b63837d..2b9a5f26 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -43,9 +43,6 @@ RUN uv pip install \ "nvidia-nvjitlink-cu12==12.5.82" RUN uv pip install --system --force-reinstall "pynvjitlink-cu12==0.5.2" -# b/385145217 Latest Colab lacks mkl numpy, install it. -RUN uv pip install --system --force-reinstall -i https://pypi.anaconda.org/intel/simple numpy - # newer daal4py requires tbb>=2022, but libpysal is downgrading it for some reason RUN uv pip install --system "tbb>=2022" "libpysal==4.9.2" @@ -59,7 +56,7 @@ ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl ADD patches/template_conf.json /opt/kaggle/conf.json # /opt/conda/lib/python3.11/site-packages -ARG PACKAGE_PATH=/usr/local/lib/python3.11/dist-packages +ARG PACKAGE_PATH=/usr/local/lib/python3.12/dist-packages # Install GPU-specific non-pip packages. {{ if eq .Accelerator "gpu" }} @@ -86,28 +83,6 @@ ADD patches/keras_internal.py \ RUN apt-get install -y libfreetype6-dev && \ apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing -# NLTK Project datasets -# b/408298750: We currently reinstall the package, because we get the following error: -# `AttributeError: module 'inspect' has no attribute 'formatargspec'. Did you mean: 'formatargvalues'?` -RUN uv pip install --system --force-reinstall "nltk>=3.9.1" -RUN mkdir -p /usr/share/nltk_data && \ - # NLTK Downloader no longer continues smoothly after an error, so we explicitly list - # the corpuses that work - python -m nltk.downloader -d /usr/share/nltk_data abc alpino averaged_perceptron_tagger \ - basque_grammars biocreative_ppi bllip_wsj_no_aux \ - book_grammars brown brown_tei cess_cat cess_esp chat80 city_database cmudict \ - comtrans conll2000 conll2002 conll2007 crubadan dependency_treebank \ - europarl_raw floresta gazetteers genesis gutenberg \ - ieer inaugural indian jeita kimmo knbc large_grammars lin_thesaurus mac_morpho machado \ - masc_tagged maxent_ne_chunker maxent_treebank_pos_tagger moses_sample movie_reviews \ - mte_teip5 names nps_chat omw opinion_lexicon paradigms \ - pil pl196x porter_test ppattach problem_reports product_reviews_1 product_reviews_2 propbank \ - pros_cons ptb punkt punkt_tab qc reuters rslp rte sample_grammars semcor senseval sentence_polarity \ - sentiwordnet shakespeare sinica_treebank smultron snowball_data spanish_grammars \ - state_union stopwords subjectivity swadesh switchboard tagsets timit toolbox treebank \ - twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \ - vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe - RUN apt-get install -y git-lfs && \ # vtk dependencies apt-get install -y libgl1-mesa-glx && \ diff --git a/config.txt b/config.txt index 61395f5e..7eaa7271 100644 --- a/config.txt +++ b/config.txt @@ -1,4 +1,4 @@ BASE_IMAGE=us-docker.pkg.dev/colab-images/public/runtime -BASE_IMAGE_TAG=release-colab_20250626-060053_RC00 +BASE_IMAGE_TAG=release-colab_20250916-060051_RC00 CUDA_MAJOR_VERSION=12 CUDA_MINOR_VERSION=5 diff --git a/kaggle_requirements.txt b/kaggle_requirements.txt index 03c489b4..aeca74b9 100644 --- a/kaggle_requirements.txt +++ b/kaggle_requirements.txt @@ -9,11 +9,9 @@ PyArabic PyUpSet Pympler Rtree -shapely<2 SimpleITK # b/302136621: Fix eli5 import for learntools, newer version require scikit-learn > 1.3 TPOT==0.12.1 -Theano Wand annoy arrow @@ -49,11 +47,10 @@ geojson geopandas==v0.14.4 gensim google-cloud-aiplatform -# b/315753846: Unpin translate package. -google-cloud-translate==3.12.1 google-cloud-videointelligence google-cloud-vision google-genai +google-adk gpxpy h2o haversine @@ -109,12 +106,10 @@ preprocessing pudb pyLDAvis pycryptodome -pydegensac pydicom pydub pyemd pyexcel-ods -pymc3 pymongo pypdf pytesseract @@ -140,10 +135,6 @@ git+https://github.com/facebookresearch/segment-anything.git # b/329869023: shap 0.45.0 breaks learntools shap==0.44.1 squarify -tensorflow-cloud -tensorflow-io -tensorflow-text -tensorflow_decision_forests timm torchao torchinfo From d99ed2c67883ba3da463f2349634b22d25a23d87 Mon Sep 17 00:00:00 2001 From: Jonathan Calderon Chavez Date: Mon, 22 Sep 2025 22:20:15 +0000 Subject: [PATCH 2/4] aaaaaaaaa --- Dockerfile.tmpl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index 2b9a5f26..a694670a 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -46,6 +46,9 @@ RUN uv pip install --system --force-reinstall "pynvjitlink-cu12==0.5.2" # newer daal4py requires tbb>=2022, but libpysal is downgrading it for some reason RUN uv pip install --system "tbb>=2022" "libpysal==4.9.2" +# b/385145217 Latest Colab lacks mkl numpy, install it. +RUN uv pip install --system --force-reinstall -i https://software.repos.intel.com/python/pypi "numpy==1.26.4" + # b/404590350: Ray and torchtune have conflicting tune cli, we will prioritize torchtune. # b/415358158: Gensim removed from Colab image to upgrade scipy RUN uv pip install --system --force-reinstall --no-deps torchtune gensim "scipy<=1.15.3" From 97ffbf0376d562592942ec6395fd53d1716a8218 Mon Sep 17 00:00:00 2001 From: Jonathan Calderon Chavez Date: Tue, 23 Sep 2025 00:15:33 +0000 Subject: [PATCH 3/4] wwwww --- Dockerfile.tmpl | 19 +++++++++++++++++++ tests/test_numpy.py | 15 +++++---------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index a694670a..956db22d 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -93,6 +93,25 @@ RUN apt-get install -y git-lfs && \ apt-get install -y xvfb && \ /tmp/clean-layer.sh +RUN uv pip install --system --force-reinstall "nltk==3.9.1" +RUN mkdir -p /usr/share/nltk_data && \ + # NLTK Downloader no longer continues smoothly after an error, so we explicitly list + # the corpuses that work + python -m nltk.downloader -d /usr/share/nltk_data abc alpino averaged_perceptron_tagger \ + basque_grammars biocreative_ppi bllip_wsj_no_aux \ + book_grammars brown brown_tei cess_cat cess_esp chat80 city_database cmudict \ + comtrans conll2000 conll2002 conll2007 crubadan dependency_treebank \ + europarl_raw floresta gazetteers genesis gutenberg \ + ieer inaugural indian jeita kimmo knbc large_grammars lin_thesaurus mac_morpho machado \ + masc_tagged maxent_ne_chunker maxent_treebank_pos_tagger moses_sample movie_reviews \ + mte_teip5 names nps_chat omw opinion_lexicon paradigms \ + pil pl196x porter_test ppattach problem_reports product_reviews_1 product_reviews_2 propbank \ + pros_cons ptb punkt punkt_tab qc reuters rslp rte sample_grammars semcor senseval sentence_polarity \ + sentiwordnet shakespeare sinica_treebank smultron snowball_data spanish_grammars \ + state_union stopwords subjectivity swadesh switchboard tagsets timit toolbox treebank \ + twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \ + vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe + # Download base easyocr models. # https://github.com/JaidedAI/EasyOCR#usage RUN mkdir -p /root/.EasyOCR/model && \ diff --git a/tests/test_numpy.py b/tests/test_numpy.py index 948455ea..0db6d631 100644 --- a/tests/test_numpy.py +++ b/tests/test_numpy.py @@ -19,13 +19,8 @@ def test_array(self): # Numpy must be linked to the MKL. (Occasionally, a third-party package will muck up the installation # and numpy will be reinstalled with an OpenBLAS backing.) def test_mkl(self): - try: - from numpy.distutils.system_info import get_info - # This will throw an exception if the MKL is not linked correctly or return an empty dict. - self.assertTrue(get_info("blas_mkl")) - except: - # Fallback to check if mkl is present via show_config() - config_out = io.StringIO() - with redirect_stdout(config_out): - np.show_config() - self.assertIn("mkl_rt", config_out.getvalue()) + # Fallback to check if mkl is present via show_config() + config_out = io.StringIO() + with redirect_stdout(config_out): + np.show_config() + self.assertIn("mkl-dynamic", config_out.getvalue()) From 3c8fa9d1cfe0326f076739f472be673bc9b893fa Mon Sep 17 00:00:00 2001 From: Jonathan Calderon Chavez Date: Tue, 23 Sep 2025 19:16:15 +0000 Subject: [PATCH 4/4] aaaa --- Dockerfile.tmpl | 7 +++--- kaggle_requirements.txt | 5 ++++- tests/test_keras_nlp.py | 4 ++-- tests/test_tensorflow_decision_forests.py | 26 +++++++++++------------ 4 files changed, 23 insertions(+), 19 deletions(-) diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index 956db22d..ef6a5daa 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -44,7 +44,8 @@ RUN uv pip install \ RUN uv pip install --system --force-reinstall "pynvjitlink-cu12==0.5.2" # newer daal4py requires tbb>=2022, but libpysal is downgrading it for some reason -RUN uv pip install --system "tbb>=2022" "libpysal==4.9.2" +# b/315753846: Unpin translate package. +RUN uv pip install --system "tbb>=2022" "libpysal==4.9.2" "google-cloud-translate==3.12.1" # b/385145217 Latest Colab lacks mkl numpy, install it. RUN uv pip install --system --force-reinstall -i https://software.repos.intel.com/python/pypi "numpy==1.26.4" @@ -58,7 +59,7 @@ ADD clean-layer.sh /tmp/clean-layer.sh ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl ADD patches/template_conf.json /opt/kaggle/conf.json -# /opt/conda/lib/python3.11/site-packages +# /opt/conda/lib/python3.12/site-packages ARG PACKAGE_PATH=/usr/local/lib/python3.12/dist-packages # Install GPU-specific non-pip packages. @@ -177,7 +178,7 @@ ADD patches/kaggle_gcp.py \ # Figure out why this is in a different place? # Found by doing a export PYTHONVERBOSE=1 and then running python and checking for where it looked for it. -ADD patches/sitecustomize.py /usr/lib/python3.11/sitecustomize.py +ADD patches/sitecustomize.py /usr/lib/python3.12/sitecustomize.py ARG GIT_COMMIT=unknown \ BUILD_DATE=unknown diff --git a/kaggle_requirements.txt b/kaggle_requirements.txt index aeca74b9..b9a93751 100644 --- a/kaggle_requirements.txt +++ b/kaggle_requirements.txt @@ -71,7 +71,7 @@ jupyterlab-lsp kaggle>=1.7.4.2 kaggle-environments keras-cv -keras-nlp +keras-hub keras-tuner kornia langid @@ -141,6 +141,9 @@ torchinfo torchmetrics torchtune transformers>=4.51.0 +tensorflow-cloud +tensorflow-io +tensorflow-text triton tsfresh vtk diff --git a/tests/test_keras_nlp.py b/tests/test_keras_nlp.py index 06defce0..465a867b 100644 --- a/tests/test_keras_nlp.py +++ b/tests/test_keras_nlp.py @@ -1,6 +1,6 @@ import unittest -import keras_nlp +import keras_hub import keras import numpy as np @@ -9,7 +9,7 @@ class TestKerasNLP(unittest.TestCase): def test_fit(self): with create_test_kagglehub_server(): - classifier = keras_nlp.models.BertClassifier.from_preset( + classifier = keras_hub.models.BertClassifier.from_preset( 'bert_tiny_en_uncased', load_weights=False, # load randomly initialized model from preset architecture with weights num_classes=2, diff --git a/tests/test_tensorflow_decision_forests.py b/tests/test_tensorflow_decision_forests.py index aeeb2e7d..58a277bf 100644 --- a/tests/test_tensorflow_decision_forests.py +++ b/tests/test_tensorflow_decision_forests.py @@ -1,18 +1,18 @@ -import unittest +# import unittest -import numpy as np -import pandas as pd -import tensorflow_decision_forests as tfdf +# import numpy as np +# import pandas as pd +# import tensorflow_decision_forests as tfdf -class TestTensorflowDecisionForest(unittest.TestCase): - def test_fit(self): - train_df = pd.read_csv("/input/tests/data/train.csv") +# class TestTensorflowDecisionForest(unittest.TestCase): +# def test_fit(self): +# train_df = pd.read_csv("/input/tests/data/train.csv") - # Convert the dataset into a TensorFlow dataset. - train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_df, label="label") +# # Convert the dataset into a TensorFlow dataset. +# train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_df, label="label") - # Train the model - model = tfdf.keras.RandomForestModel(num_trees=1) - model.fit(train_ds) +# # Train the model +# model = tfdf.keras.RandomForestModel(num_trees=1) +# model.fit(train_ds) - self.assertEqual(1, model.count_params()) \ No newline at end of file +# self.assertEqual(1, model.count_params()) \ No newline at end of file