In [None]:
#| default_exp libraries

# fastkaggle.libraries

> API details for fastkaggle libraries to help manage libraries as kaggle datasets

In [None]:
#|hide
from nbdev.showdoc import *

In [None]:
#|export
import os,json,subprocess, shutil
import re
from fastcore.utils import *

from fastkaggle.core import *
from fastkaggle.datasets import *
from fastkaggle.competition import *
# from fastcore.all import *

### Pip Libraries

In [None]:
#| export
def get_pip_library(pip_library, # name of library for pip to install
                    cfg_path='.'
                   ):    
    '''Download the whl files for pip_library and store in dataset_path'''
    cfg = get_config_values(cfg_path)
    
    pip_cmd=cfg['pip_cmd']
    dataset_path = Path(cfg_path)/cfg['data_path']/pip_library

    bashCommand = f"{pip_cmd} download {pip_library} -d {dataset_path}"
    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
    output, error = process.communicate()
    return process,output,error

In [None]:
lib = 'fastcore'
get_pip_library(lib)
assert Path(lib).exists()
Path(lib).ls().map(lambda x: x.unlink())
Path(lib).rmdir()

In [None]:
#| export
def get_pip_libraries(directory_name,
                    cfg_path='.'
                   ):    
    cfg = get_config_values(cfg_path)
    
    pip_cmd=cfg['pip_cmd']
    dataset_path = Path(cfg_path)/cfg['data_path']/directory_name
    libraries = ' '.join(cfg['required_libraries'])

    bashCommand = f"{pip_cmd} download {libraries} -d {dataset_path}"
    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
    output, error = process.communicate()
    return process,output,error

In [None]:
directory_name = 'my-test-libs'
get_pip_libraries('my-test-libs')
assert Path(directory_name).exists()
Path(directory_name).ls().map(lambda x: x.unlink())
Path(directory_name).rmdir()

In [None]:
#| export
def get_local_ds_ver(lib_path, # Local path dataset is stored in
                     lib # Name of library (ie "fastcore")
                    ):
    '''checks a local copy of kaggle dataset for library version number'''
    wheel_lib_name = lib.replace('-','_')
    local_path = (lib_path/f"library-{lib}")
    lib_whl = local_path.ls().filter(lambda x: wheel_lib_name in x.name.lower())
    if 1==len(lib_whl):
        return re.search(f"(?<={wheel_lib_name}-)[\d+.]+\d",lib_whl[0].name.lower())[0]
    elif 0<len(local_path.ls().filter(lambda x: 'dist' in x.name)):
        lib_whl = (local_path/'dist').ls().filter(lambda x: wheel_lib_name in x.name.lower())
        if 1==len(lib_whl):
            return re.search(f"(?<={wheel_lib_name}-)[\d+.]+\d",lib_whl[0].name.lower())[0]
    return None

### High Level

In [None]:
#| export
def create_dependency_dataset(cfg_path='.', # Path to fastkaggle.json file
                                version_notes = "New Update",
                               ):
    retain = ["dataset-metadata.json"],
    cfg = get_config_values(cfg_path)
    
    pip_cmd=cfg['pip_cmd']
    local_path = Path(cfg_path)/cfg['data_path']/cfg['libraries_dataset_name']
    ds_slug = f"{cfg['datasets_username']}/{cfg['libraries_dataset_name']}"
    
    print(f"-----Downloading or Creating Dataset if needed")
    if local_path.exists(): pass
    elif ds_exists(ds_slug): get_dataset(ds_slug,str(local_path))
    else:                    mk_dataset(local_path,cfg['libraries_dataset_name'])
    
    print(f"-----Checking dataset files against pip")
    orig_ds = Path(local_path).ls().sorted()
    for item in local_path.ls():
        if item.name in retain: pass
        elif item.is_dir(): shutil.rmtree(item)
        else: item.unlink()        
    get_pip_libraries(cfg['libraries_dataset_name'],cfg_path) 
    new_ds = Path(local_path).ls().sorted()
    
    if orig_ds != new_ds: 
        print(f"-----Updating {cfg['libraries_dataset_name']} in Kaggle")
        push_dataset(local_path,version_notes)
    else: print(f"-----Kaggle dataset already up to date")

In [None]:
create_dependency_dataset()
path = Path('libraries-titanic')
assert path.exists()
assert ds_exists('isaacflath/libraries-titanic')
ds_exists('isaacflath/libraries-titanic')
Path(path).ls().map(lambda x: x.unlink())
Path(path).rmdir()

-----Downloading or Creating Dataset if needed
-----Checking dataset files against pip
-----Updating libraries-titanic in Kaggle


## Export -

In [None]:
#|hide
#|eval: false
from nbdev.doclinks import nbdev_export
nbdev_export()