-
Notifications
You must be signed in to change notification settings - Fork 0
/
dodo.py
109 lines (83 loc) · 2.73 KB
/
dodo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os
import sys
import configparser
import doit
from doit.cmd_base import ModuleTaskLoader
from os.path import isfile
import kaggle
import baseline_model
DOIT_CONFIG = {
'default_tasks': ['train'],
'verbosity': 2
}
def read_credentials():
# By default use environment variables to inject credentials by Docker, for example.
# Alternatively, override environment variables from local .credentials.ini file with the following format:
#
# [kaggle]
# login=your_login
# password=your_password
#
if isfile('.credentials.ini'):
config = configparser.ConfigParser()
config.read('.credentials.ini')
os.environ["KAGGLE_LOGIN"] = config['kaggle']['login']
os.environ["KAGGLE_PASSWORD"] = config['kaggle']['password']
def task_read_credentials():
return {
'actions': [read_credentials]
}
def download_train():
login = os.environ["KAGGLE_LOGIN"]
password = os.environ["KAGGLE_PASSWORD"]
kaggle.download('statoil-iceberg-classifier-challenge', 'train.json.7z', login, password,
'data/train.json.7z')
def task_download_train():
return {
'actions': [download_train],
'uptodate': [True],
'setup': ['read_credentials'],
'targets': ['data/train.json.7z']
}
def download_test():
login = os.environ["KAGGLE_LOGIN"]
password = os.environ["KAGGLE_PASSWORD"]
kaggle.download('statoil-iceberg-classifier-challenge', 'test.json.7z', login, password,
'data/test.json.7z')
def task_download_test():
return {
'actions': [download_test],
'uptodate': [True],
'setup': ['read_credentials'],
'targets': ['data/test.json.7z']
}
# requires 7zip
# sudo apt-get install p7zip-full
def task_unzip_train():
return {
'actions': ['bash -c "7z e data/train.json.7z -y -odata"'],
'file_dep': ['data/train.json.7z'],
'targets': ['data/train.json']
}
def task_unzip_test():
return {
'actions': ['bash -c "7z e data/test.json.7z -y -odata"'],
'file_dep': ['data/test.json.7z'],
'targets': ['data/test.json']
}
# TODO: extract convert_train_to_numpy to a separate file and replace file_dep from baseline_model.py to that file.
def task_convert_train_to_numpy():
return {
'actions': [baseline_model.convert_train_to_numpy],
'file_dep': ['baseline_model.py', 'data/train.json'],
'targets': ['data/train.npy']
}
def task_train():
return {
'actions': [baseline_model.train],
'file_dep': ['baseline_model.py', 'data/train.npy']
}
def train():
doit.doit_cmd.DoitMain(ModuleTaskLoader(sys.modules[__name__])).run(['train'])
if __name__ == "__main__":
train()