In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#default_exp core

In [None]:
#exports
import os
import shutil
import json
import gc
import sys
import time
import logging
import zipfile
from itertools import chain
from typing import Union
from multiprocessing import Pool, cpu_count

import numpy as np
import pandas as pd
import psutil
from fastcore.script import call_parse, Param
from fastcore.parallel import parallel

In [None]:
#hide
from nbdev.showdoc import *

# Core
> Some basic functions used in the chrisrichardmiles library. 

The main functions of interest are: 
 * `mkdirs_data` to initialize the data directory structure
 * `get_file_cols_dict` to make a json and/or dictionary of all features available
 * `save_file` to save features with the data types as the top row of the csv
 * `load_file` and `load_features` to load in features

## Data directories 

In [None]:
#exports
def mkdirs_data(data_dir_name: str='data') -> None: 
    """Initializes the data directory structure"""
    os.makedirs(f'{data_dir_name}/raw', exist_ok=True)
    os.makedirs(f'{data_dir_name}/interim', exist_ok=True)
    os.makedirs(f'{data_dir_name}/features', exist_ok=True)
    os.makedirs(f'{data_dir_name}/models', exist_ok=True)
    
@call_parse
def cli_mkdirs_data(data_dir_name: Param('Name of data folder', str)='data') -> None: 
    mkdirs_data(data_dir_name)
    
@call_parse
def cp_tree(dir1: Param('path to directory to copy', str), 
            dir2: Param('path to endpoint', str)): 
    shutil.copytree(dir1, dir2, dirs_exist_ok=True)

In [None]:
#export 
def download_kaggle_data(comp_name:str=None):
    """Downloads competition data using the kaggle api"""
    mkdirs_data()
    os.system(f'kaggle competitions download -c {comp_name}')
    zf = zipfile.ZipFile(f'{comp_name}.zip')
    zf.extractall(f'data/raw')
    os.remove(f'{comp_name}.zip')
    
@call_parse
def cli_download_kaggle_data(comp_name:Param('name of kaggle competition', str)=None):
    download_kaggle_data(comp_name)

## Saving and loading flies


### Prevent saving over files 

In order to to make sure we don't accidentally save over a file with the the same name, we have some functions to ensures that certain strings and paths are unique.

In [None]:
#exports
def make_unique(name: str, names: "list or set or dict") -> str: 
    """Returns name with (x)_ prefix if `name` already in `names`. 
    This is useful when you want to make sure you don't save over
    existing data with the same key `name`.
    """
    if name in names:
        x = 1
        while f'({x})_' + name in names: x += 1
        name = f'({x})_' + name
    return name

In [None]:
make_unique('d', ['a', 'b', 'c'])

'd'

In [None]:
make_unique('a', ['a', 'b', 'c'])

'(1)_a'

In [None]:
assert make_unique('d', ['a', 'b', 'c']) == 'd'
assert make_unique('chris', ['chris', 'dan', 'bill']) == '(1)_chris'
assert make_unique('chris', ['chris', 'dan', 'bill', '(1)_chris']) == '(2)_chris'
assert make_unique('chris', set(['chris', 'dan', 'bill', '(1)_chris'])) == '(2)_chris'
assert make_unique('a', {'a': 1, 'b': 2, 'c': 3}) == '(1)_a'
assert make_unique('d', {'a': 1, 'b': 2, 'c': 3}) == 'd'

In [None]:
#exports
def make_unique_path(path):  
    """Returns path with prefix '(n)_' before the last element in 
    path if it is a duplicate. 
    """
    pre_path, file_name = os.path.split(path)
    file_name = make_unique(file_name, os.listdir(pre_path or '.'))
    return os.path.join(pre_path, file_name)

In [None]:
os.makedirs('tmp', exist_ok=True)
print(make_unique_path('tmp/tmp.csv'))
open('tmp/tmp.csv', 'w').close()
print(make_unique_path('tmp/tmp.csv'))
shutil.rmtree('tmp')

tmp/tmp.csv
tmp/(1)_tmp.csv


### Saving and loading pandas dataframes

In [None]:
#exports
def save_file(df: pd.DataFrame, 
              path: str, 
              usecols: list=None,
              save_index: bool=False, 
              save_dtypes: bool=True, 
              pickle: bool=False) -> None:
    """Saves `df` to `path` with dtypes as top column if `save_dtypes` 
    is set to True. Load a files in this structure with `load_file`
    """
    if pickle: 
        usecols = usecols if usecols else list(df)
        path_dir = os.path.split(path)[0] if path.endswith('.csv') else path # For M5 project maintenence
        for col in list(df): 
            df[[col]].to_pickle(os.path.join(path_dir, col + '.pkl'))
        return 
    
    path = make_unique_path(path)
    if save_dtypes:
        df_tmp = df.iloc[[0], :]
        if usecols: df_tmp = df_tmp.loc[:, usecols]
        if save_index: 
            df_tmp.reset_index(inplace=True)
        df_dtypes = df_tmp.dtypes.to_frame().T
        df_dtypes.to_csv(path, index=False)
        df.to_csv(path, mode='a', index=save_index, header=False, 
                  columns=usecols)
    else: 
        df.to_csv(path, index=save_index, columns=usecols)
        
def load_file(path: str, load_dtypes=True, usecols: list=None) -> pd.DataFrame:
    """Loads a file into a DataFrame from `path` with dtypes 
    taken from the top column if `load_dtypes` is set to True. 
    Loads a files in the structure created with `save_file`.
    """
    if path.endswith('pkl'): 
        df = pd.read_pickle(path)
        return df[usecols] if usecols else df
    
    if load_dtypes:
        dtypes = pd.read_csv(path, nrows=1).iloc[0].to_dict()
        return pd.read_csv(path, skiprows=[1], dtype=dtypes, usecols=usecols)
    else:
        return pd.read_csv(path, usecols=usecols)

In [None]:
# Example
df = pd.DataFrame({'a': [1, 2], 'b': ['foo', 'bar'], 'c': [1.2, 3.3]})
df = df.astype(dict(zip(['a', 'b', 'c'], ['int32', 'category', np.float16])))
print('Saving the the dataframe to csv with the dtypes')
display(df.dtypes)
save_file(df, 'tmp.csv', pickle=False)
print('Now the csv has the datatypes as the top line when we read it in')
display(pd.read_csv('tmp.csv'))

print('We can use `load_file` to read in the csv with the right dtypes')
display(load_file('tmp.csv'))
display(load_file('tmp.csv').dtypes)

Saving the the dataframe to csv with the dtypes


a       int32
b    category
c     float16
dtype: object

Now the csv has the datatypes as the top line when we read it in


Unnamed: 0,a,b,c
0,int32,category,float16
1,1,foo,1.2
2,2,bar,3.3


We can use `load_file` to read in the csv with the right dtypes


Unnamed: 0,a,b,c
0,1,foo,1.200195
1,2,bar,3.300781


a       int32
b    category
c     float16
dtype: object

In [None]:
save_file(df, '.', pickle=True)
display(pd.concat([load_file(x + '.pkl') for x in 'abc'], axis=1))
!rm a.pkl b.pkl c.pkl

Unnamed: 0,a,b,c
0,1,foo,1.200195
1,2,bar,3.300781


In [None]:
save_file(df, 'tmp2.csv', usecols=['a', 'c'], save_index=True, pickle=False)
load_file('tmp2.csv')

Unnamed: 0,index,a,c
0,0,1,1.200195
1,1,2,3.300781


In [None]:
!rm tmp*.csv

### Making a dictionary and json with file names as keys and list of column names as values. 

In [None]:
for file in sorted(os.listdir('.')):
    print(file)

.devcontainer.json
.git
.gitattributes
.gitconfig
.github
.gitignore
.ipynb_checkpoints
.pypirc
00_core.ipynb
CONTRIBUTING.md
LICENSE
MANIFEST.in
Makefile
README.md
chrisrichardmiles
chrisrichardmiles.egg-info
data
docker-compose.yml
docs
index.ipynb
log.log
projects
settings.ini
setup.py
small_data


In [None]:
#exports
def get_file_cols_dict(path: str='.', 
                       path_json: str='', 
                       ignore_cols: list=['index']):
    """Explores `path` and returns a dictionary of file names and their columns
    for each file in `path`. Only file names that end with 
    '.csv' and '.pkl' will be considered. Pickle file names
    will go in the 'pickles' key of the returned dictionary.
    Csv files will see their file name saved as a key with 
    a list of their column names saved as the corresponding 
    value.
    """
    
    d = {}
    for file in sorted(os.listdir(path)): 
        if file.endswith('.csv'): 
            cols = pd.read_csv(os.path.join(path, file), nrows=0).columns.tolist()
            d[file] = [c for c in cols if c not in ignore_cols]
        if file.endswith('.pkl'): 
            d.setdefault('pickles', []).append(file)
    if path_json: 
        with open(path_json, 'w') as path_json: 
            json.dump(d, path_json, indent=0)
    return d

@call_parse
def fe_dict(path: Param('path to directory with files', str)='data/features', 
            path_json: Param('path to json for saving dict', str)='fe_dict.json'):
    get_file_cols_dict(path, path_json)

In [None]:
df1 = pd.DataFrame({'feat_1': [1,2,2,4], 'feat_2': [1,1,3,3], 'feat_3': [1,4,3,3]})
df2 = pd.DataFrame({'shift_feat_4': [1,9,2,4], 'shift_feat_5': [1,1,3,9], 'shift_feat_6': [1,9,3,3]})
df3 = pd.DataFrame({'feat_7': [1,7,2,4], 'feat_8': [7,1,3,3], 'feat_9': [1,7,3,3]})
df4 = pd.DataFrame({'feat_10': [1,7,2,4], 'feat_11': [7,1,3,3], 'feat_12': [1,7,3,3], 
                    'feat_13': ['a', 'b', 'c', 'd']})
df4.feat_10 = df4.feat_10.astype('int8')
df4.feat_13 = df4.feat_13.astype('category')

save_file(df1, 'features_1.csv', pickle=False)
save_file(df2, 'shift_features_2.csv', pickle=False)
save_file(df3, 'features_3.csv', pickle=False)
save_file(df4, 'features_4.csv', save_index=True, pickle=False)
save_file(df3, 'features_3_less_cols.csv', usecols=['feat_7'], pickle=False)

In [None]:
get_file_cols_dict('.', path_json='tmp_features.json')

{'features_1.csv': ['feat_1', 'feat_2', 'feat_3'],
 'features_3.csv': ['feat_7', 'feat_8', 'feat_9'],
 'features_3_less_cols.csv': ['feat_7'],
 'features_4.csv': ['feat_10', 'feat_11', 'feat_12', 'feat_13'],
 'shift_features_2.csv': ['shift_feat_4', 'shift_feat_5', 'shift_feat_6']}

In [None]:
get_file_cols_dict('.')

{'features_1.csv': ['feat_1', 'feat_2', 'feat_3'],
 'features_3.csv': ['feat_7', 'feat_8', 'feat_9'],
 'features_3_less_cols.csv': ['feat_7'],
 'features_4.csv': ['feat_10', 'feat_11', 'feat_12', 'feat_13'],
 'shift_features_2.csv': ['shift_feat_4', 'shift_feat_5', 'shift_feat_6']}

### Loading features 

In [None]:
#export
def load_features(path_features: Union[list, str],
                  dict_features: Union[dict, str]=None,
                  shift_index: int=0, 
                  reindex_with: "list like"=None,
                  shift_prefix: Union[str, bool]='shift',
                  load_dtypes: bool=True,
                  features: list=None,
                  pickle: bool=True) -> pd.DataFrame: 
    """Loads the features selected in `dict_features` into a dataframe.
    `dict_features` Must be a module that is located in the working 
    directory.
    
    Parameters
    ----------
    path_features: Union[list, str]
        path to the folder that holds the features files
        
    dict_features: Union[dict, str]
        dict or path to the json that holds the feature dictionary. Set this
        parameter to None if you want to load all csv files, optionally
        filtered by `features` list.
        
    shift_index: int=0
        used to shift columns of files starting with `shift_prefix` when training 
        for prediction periods past day 1.
        
    shift_prefix: Union[str, bool]='shift'
        The prefix of files that should have their index shifted for 
        proper lag alignment in time series prediction.
        Set this to the boolean True to shift index of all files.
        
    reindex_with: "list like"=None 
        Use anything that works with df.reindex(reindex_with). This is used when you 
        only need rows for a subset of the orginal data. 
        
    load_dtype: bool=True
        This will use the first row for dtypes
        
    features: list=None
        An explicit list of features that you want. Only these will be loaded 
        if provided.
    """
    
    if type(path_features) == list: 
        df = pd.DataFrame()
        for pf in path_features: 
            args = (pf, dict_features, shift_index, reindex_with, shift_prefix, load_dtypes, features, load_all)
            df = pd.concat([df, load_features(*args)], axis=1)
        return df if df else None
    
    if type(dict_features) == str:
        with open(dict_features, 'r') as file:
            dict_features = json.load(file)
    
    if not dict_features:
        dict_features = get_file_cols_dict(path_features)
    
    dfs = []
    dict_features = dict_features.copy()
    for pkl in dict_features.pop('pickles', []): 
        if features and pkl[:-4] not in features: continue
        df = pd.read_pickle(os.path.join(path_features, pkl))
        if type(reindex_with) != None: df = df.reindex(reindex_with)
        dfs.append(df)
        
    for pkl in dict_features.pop('shift_pickles', []): 
        if features and pkl[:-4] not in features: continue
        df = pd.read_pickle(os.path.join(path_features, pkl))
        df.index = df.index + shift_index
        if type(reindex_with) != None: df = df.reindex(reindex_with)
        dfs.append(df)
        
    # Filter dict to keep only keys that are in `path_features`
    dict_features = {k: v for k, v in dict_features.items() if k in os.listdir(path_features)}
        
    for file, f_list in dict_features.items(): 
        if features: f_list = [f for f in f_list if f in features]
        path = os.path.join(path_features, file)
        df = load_file(path, load_dtypes, f_list)
        if 'index' in df.columns: df.set_index('index', inplace=True)
        if file.startswith(shift_prefix) and shift_index: 
            df.index = df.index + shift_index
        if type(reindex_with) != None: df = df.reindex(reindex_with)
        dfs.append(df)
    if not dfs: 
        logging.info("No data was loaded")
        print("No data was loaded")
    return pd.concat(dfs, axis=1) if dfs else None

In [None]:
#hide
show_doc(load_features)

<h4 id="load_features" class="doc_header"><code>load_features</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>load_features</code>(**`path_features`**:`Union`\[`list`, `str`\], **`dict_features`**:`Union`\[`dict`, `str`\]=*`None`*, **`shift_index`**:`int`=*`0`*, **`reindex_with`**:`list like`=*`None`*, **`shift_prefix`**:`Union`\[`str`, `bool`\]=*`'shift'`*, **`load_dtypes`**:`bool`=*`True`*, **`features`**:`list`=*`None`*, **`pickle`**:`bool`=*`True`*)

Loads the features selected in `dict_features` into a dataframe.
`dict_features` Must be a module that is located in the working 
directory.

Parameters
----------
path_features: Union[list, str]
    path to the folder that holds the features files
    
dict_features: Union[dict, str]
    dict or path to the json that holds the feature dictionary. Set this
    parameter to None if you want to load all csv files, optionally
    filtered by `features` list.
    
shift_index: int=0
    used to shift columns of files starting with `shift_prefix` when training 
    for prediction periods past day 1.
    
shift_prefix: Union[str, bool]='shift'
    The prefix of files that should have their index shifted for 
    proper lag alignment in time series prediction.
    Set this to the boolean True to shift index of all files.
    
reindex_with: "list like"=None 
    Use anything that works with df.reindex(reindex_with). This is used when you 
    only need rows for a subset of the orginal data. 
    
load_dtype: bool=True
    This will use the first row for dtypes
    
features: list=None
    An explicit list of features that you want. Only these will be loaded 
    if provided.

Now we can easily load in our features with the correct data types

In [None]:
load_features('.', 'tmp_features.json', pickle=False)

Unnamed: 0,feat_1,feat_2,feat_3,feat_7,feat_8,feat_9,feat_7.1,feat_10,feat_11,feat_12,feat_13,shift_feat_4,shift_feat_5,shift_feat_6
0,1,1,1,1,7,1,1,1,7,1,a,1,1,1
1,2,1,4,7,1,7,7,7,1,7,b,9,1,9
2,2,3,3,2,3,3,2,2,3,3,c,2,3,3
3,4,3,3,4,3,3,4,4,3,3,d,4,9,3


In [None]:
df = load_features('.', 'tmp_features.json', features=['feat_3', 'feat_10', 'feat_13'], pickle=False)
display(df)
display(df.dtypes)

Unnamed: 0,feat_3,feat_10,feat_13
0,1,1,a
1,4,7,b
2,3,2,c
3,3,4,d


feat_3        int64
feat_10        int8
feat_13    category
dtype: object

Sometimes we need to shift the index so that our lag features are in the correct allignment. 

In [None]:
load_features('.', 'tmp_features.json', shift_index=1, pickle=False)

Unnamed: 0,feat_1,feat_2,feat_3,feat_7,feat_8,feat_9,feat_7.1,feat_10,feat_11,feat_12,feat_13,shift_feat_4,shift_feat_5,shift_feat_6
0,1.0,1.0,1.0,1.0,7.0,1.0,1.0,1.0,7.0,1.0,a,,,
1,2.0,1.0,4.0,7.0,1.0,7.0,7.0,7.0,1.0,7.0,b,1.0,1.0,1.0
2,2.0,3.0,3.0,2.0,3.0,3.0,2.0,2.0,3.0,3.0,c,9.0,1.0,9.0
3,4.0,3.0,3.0,4.0,3.0,3.0,4.0,4.0,3.0,3.0,d,2.0,3.0,3.0
4,,,,,,,,,,,,4.0,9.0,3.0


Sometimes we are loading features for a subset of the data so we only need to load the rows associated with certain indexes.

In [None]:
load_features('.', 'tmp_features.json', shift_index=1, reindex_with=[1, 3], pickle=False)

Unnamed: 0,feat_1,feat_2,feat_3,feat_7,feat_8,feat_9,feat_7.1,feat_10,feat_11,feat_12,feat_13,shift_feat_4,shift_feat_5,shift_feat_6
1,2,1,4,7,1,7,7,7,1,7,b,1,1,1
3,4,3,3,4,3,3,4,4,3,3,d,2,3,3


We can use a copy of the feature module, easily comment out features we don't want, and use this to load features.

In [None]:
shutil.copyfile('tmp_features.json', 'tmp_features_1.json')

'tmp_features_1.json'

open `tmp_features_1.json` and delete features

In [None]:
!cat tmp_features_1.json

{
"features_1.csv": [
"feat_1",
"feat_2",
"feat_3"
],
"features_3.csv": [
"feat_7",
"feat_8",
"feat_9"
],
"features_3_less_cols.csv": [
"feat_7"
],
"features_4.csv": [
"feat_10",
"feat_11",
"feat_12",
"feat_13"
],
"shift_features_2.csv": [
"shift_feat_4",
"shift_feat_5",
"shift_feat_6"
]
}

In [None]:
load_features('.', 'tmp_features_1.json', pickle=False)

Unnamed: 0,feat_1,feat_2,feat_3,feat_7,feat_8,feat_9,feat_7.1,feat_10,feat_11,feat_12,feat_13,shift_feat_4,shift_feat_5,shift_feat_6
0,1,1,1,1,7,1,1,1,7,1,a,1,1,1
1,2,1,4,7,1,7,7,7,1,7,b,9,1,9
2,2,3,3,2,3,3,2,2,3,3,c,2,3,3
3,4,3,3,4,3,3,4,4,3,3,d,4,9,3


In [None]:
!rm *.csv
!rm tmp*.json

## Speed and memory functions 

### Paralel runs

In [None]:
#export
def pool_func(function, input_list: list, verbose=False, n_cpu=99):
    """Uses the Pool function from the package 'multiprocessing'
    to run `function` over the list `input_list`.  The `function`
    should only take """

    n_cpu = min(n_cpu, cpu_count())
    if verbose:
        print('#############################################')
        print('Pooling function: ')
        if hasattr(function, '__name__'):
            print(function.__name__)
        print(f'{n_cpu} of {cpu_count()} cpus used')
        print('Number of function calls: ', len(input_list))

    start = time.time()
    pool = Pool(n_cpu)
    res = pool.map(function, input_list)
    pool.close()
    pool.join()

    if verbose:
        print('Time taken:',
              round((time.time() - start) / 60, 2),
              'minutes')
    return res if res else []

In [None]:
def f(x): return x * 5
pool_func(f, list(range(20)), True)

#############################################
Pooling function: 
f
16 of 16 cpus used
Number of function calls:  20
Time taken: 0.0 minutes


[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95]

### Saving memory 

In [None]:
#export
def reduce_mem_usage(df, verbose=True):
    """Converts numeric columns to smallest datatype that preserves information"""
    
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                       df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [None]:
#export
def merge_by_concat(df1, df2, merge_on):
    if type(merge_on) == str: merge_on = [merge_on]
    merged_df = df1[merge_on]
    merged_df = merged_df.merge(df2, on=merge_on, how='left')
    new_columns = [col for col in list(merged_df) if col not in merge_on]
    df1 = pd.concat([df1, merged_df[new_columns]], axis=1)
    return df1

In [None]:
#export
def get_memory_usage():
    """Returns RAM usage in gigabytes
    
    Explanation of code
    -------------------
    # getpid: gets the process id number.
    # psutil.process gets that process with a certain pid.
    # .memory_info() describes notebook memory usage.
    # [0] gets the rss resident state size of (process I think) in bytes.
    # /2.**30 converts output from bytes to gigabytes
    """
    return np.round(psutil.Process(os.getpid()).memory_info()[0]/2.**30, 2) 
        
def sizeof_fmt(num, suffix='B'):
    """Reformats `num`, which is num bytes"""
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)

In [None]:
get_memory_usage()

0.14

In [None]:
#export
def time_taken(start_time: float=0, time_elapsed: float=None): 
    """Returns a string with the time elapsed from `start_time` 
    in a nice format. If `time_elapsed` is provided, we ignore 
    the start time. 
    
    `start_time` should come from by calling the time module: 
    start_time = time.time()
    """
    
    import time
    if not time_elapsed: 
        time_elapsed = int(time.time() - start_time)
    else:
        time_elapsed = int(time_elapsed)
        
    m, s = divmod(time_elapsed, 60)
    h, m = divmod(m, 60)
    d, h = divmod(h, 24)
    if d: return f'Time taken: {d} days {h} hours {m} minutes {s} seconds'
    if h: return f'Time taken: {h} hours {m} minutes {s} seconds'
    if m: return f'Time taken: {m} minutes {s} seconds'
    if s: return f'Time taken: {s} seconds'
    return 'Time taken: 0 seconds'

In [None]:
start_time = time.time()
time.sleep(2)
time_taken(start_time)

'Time taken: 2 seconds'

In [None]:
time_taken(time_elapsed=3666)

'Time taken: 1 hours 1 minutes 6 seconds'

In [None]:
#hide
from nbdev.export import notebook2script; notebook2script()

Converted 00_core.ipynb.
Converted index.ipynb.
Converted 01_brief_eda.ipynb.
Converted 02_WRMSSE_metric.ipynb.
Converted 03_feature_engineering.ipynb.
Converted 04_out_of_stock_detection.ipynb.
Converted index.ipynb.
Converted training_day_by_day_models.ipynb.
