In [40]:
import os
import re
import shutil
from pathlib import Path

DEPENDENCIES_SAVE_PATH = Path('dependencies')
SOURCE_CODE_SAVE_PATH = Path('source-code')
SOURCE_CODE_PATH = Path('../')

UPDATE_DEPENDENCIES = False
UPDATE_SOURCE_CODE = True

# Manages Depenedencies

### Download Dependencies and ZIP them

In [37]:
if UPDATE_DEPENDENCIES:
    if os.path.exists(DEPENDENCIES_SAVE_PATH):
        print('Cleaning the dependencies folder')
        for filename in os.listdir(DEPENDENCIES_SAVE_PATH):
            file_path = os.path.join(DEPENDENCIES_SAVE_PATH, filename)
            if filename != 'tmp':
                if os.path.isfile(file_path):
                    os.remove(file_path)
                elif os.path.isdir(file_path):
                    shutil.rmtree(file_path)
    else:
        os.makedirs(DEPENDENCIES_SAVE_PATH)

    print('Copying the requirements.txt file and excluding -e')
    with open(SOURCE_CODE_PATH / 'requirements.txt', 'r') as f:
        lines = f.readlines()
    with open(DEPENDENCIES_SAVE_PATH / 'requirements.txt', 'w') as f:
        for line in lines:
            if line.startswith('-e'):
                continue
            f.write(line)

    print('Downloading the dependencies')
    !pip download -r {DEPENDENCIES_SAVE_PATH / 'requirements.txt'} -d {DEPENDENCIES_SAVE_PATH / 'tmp'}

    print('Zipping the downloaded dependencies')
    shutil.make_archive(DEPENDENCIES_SAVE_PATH / 'dependencies', 'zip', DEPENDENCIES_SAVE_PATH / 'tmp')
    shutil.move(DEPENDENCIES_SAVE_PATH / 'dependencies.zip', DEPENDENCIES_SAVE_PATH / 'dependencies.no_unzip')

    print('Copying the dataset-metadata.json file')
    shutil.copy('dataset-metadata-dependencies.json', DEPENDENCIES_SAVE_PATH / 'dataset-metadata.json')

    print('Excluding --find-files in requirements.txt')
    with open(DEPENDENCIES_SAVE_PATH / 'requirements.txt', 'r') as f:
        lines = f.readlines()
    with open(DEPENDENCIES_SAVE_PATH / 'requirements.txt', 'w') as f:
        for line in lines:
            if line.startswith('--find-links'):
                continue
            f.write(line)

    print('Done')

Cleaning the dependencies folder
Copying the requirements.txt file and excluding -e
Downloading the dependencies
Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting affine==2.4.0 (from -r dependencies/requirements.txt (line 1))
  Using cached affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting alabaster==0.7.13 (from -r dependencies/requirements.txt (line 2))
  Using cached alabaster-0.7.13-py3-none-any.whl.metadata (3.0 kB)
Collecting ansi2html==1.9.1 (from -r dependencies/requirements.txt (line 3))
  Using cached ansi2html-1.9.1-py3-none-any.whl.metadata (3.7 kB)
Collecting antlr4-python3-runtime==4.9.3 (from -r dependencies/requirements.txt (line 4))
  Using cached antlr4-python3-runtime-4.9.3.tar.gz (117 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting anyio=

### Upload Dependencies to Kaggle as a Dataset

In [38]:
if UPDATE_DEPENDENCIES:
    # !kaggle datasets create -p ./dependencies
    !kaggle datasets version -d -p ./dependencies -m "Update Dependencies"


Starting upload for file requirements.txt
100%|██████████████████████████████████████| 4.44k/4.44k [00:01<00:00, 2.74kB/s]
Upload successful: requirements.txt (4KB)
Starting upload for file dependencies.no_unzip
100%|██████████████████████████████████████| 2.71G/2.71G [01:24<00:00, 34.5MB/s]
Upload successful: dependencies.no_unzip (3GB)
Skipping folder: tmp; use '--dir-mode' to upload folders
Dataset version is being created. Please check progress at https://www.kaggle.com/justanotherariel/epoch-hms-dependencies


# Manage Source Code

### Copy Source Code and ZIP it

In [48]:
if UPDATE_SOURCE_CODE:
    if os.path.exists(SOURCE_CODE_SAVE_PATH):
        shutil.rmtree(SOURCE_CODE_SAVE_PATH)
    os.mkdir(SOURCE_CODE_SAVE_PATH)

    # Copy Source Code to submission/source_code
    relevant_files = ['src/', 'conf/', 'tm/', 'submit.py']
    exluded_files = ['__pycache__']
    for file in relevant_files:
        if os.path.isdir(SOURCE_CODE_PATH / file):
            # Copy directory, skip excluded files with shutil
            shutil.copytree(SOURCE_CODE_PATH / file, SOURCE_CODE_SAVE_PATH / "tmp" / file, ignore=shutil.ignore_patterns(*exluded_files))
        else:
            # Copy file
            shutil.copy(SOURCE_CODE_PATH / file, SOURCE_CODE_SAVE_PATH / "tmp" / file)

    # Zip source_code
    shutil.make_archive(SOURCE_CODE_SAVE_PATH / 'source-code', 'zip', SOURCE_CODE_SAVE_PATH / "tmp")
    shutil.rmtree(SOURCE_CODE_SAVE_PATH / "tmp")

    # Copy dataset-metadata.json to submission
    shutil.copy('dataset-metadata-source-code.json', SOURCE_CODE_SAVE_PATH / 'dataset-metadata.json')

    print('Submission files saved to source_code')

Submission files saved to source_code


### Upload Source Code

In [49]:
if UPDATE_SOURCE_CODE:
    # !kaggle datasets create -p ./source-code
    !kaggle datasets version -p ./source-code -m "Update Source Code"

Starting upload for file source-code.zip
100%|████████████████████████████████████████| 116k/116k [00:01<00:00, 79.9kB/s]
Upload successful: source-code.zip (116KB)
Dataset version is being created. Please check progress at https://www.kaggle.com/justanotherariel/epoch-hms-source-code
