Skip to content

Commit

Permalink
Moved datafile pointers; new progress_bar; better error on unzip
Browse files Browse the repository at this point in the history
  • Loading branch information
dsblank committed Sep 15, 2018
1 parent 6d4a3ed commit 9252d3f
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 18 deletions.
2 changes: 1 addition & 1 deletion conx/datasets/_colors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import csv

def colors(*args, path='colors.csv',
url="https://raw.githubusercontent.com/Calysto/conx/master/data/colors.csv",
url="https://raw.githubusercontent.com/Calysto/conx-data/master/colors/colors.csv",
**kwargs):
dataset = cx.Dataset()
from keras.utils import get_file
Expand Down
2 changes: 1 addition & 1 deletion conx/datasets/_fingers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def fingers(*args, path='fingers.npz', **kwargs):
dataset = cx.Dataset()
inputs, labels = load_dataset_npz(
path,
"https://raw.githubusercontent.com/Calysto/conx/master/data/fingers.npz")
"https://raw.githubusercontent.com/Calysto/conx-data/master/fingers/fingers.npz")
inputs = inputs.astype('float32')
inputs /= 255
make_target_vector = lambda label: [int(label == n) for n in range(6)]
Expand Down
12 changes: 6 additions & 6 deletions conx/datasets/_gridfonts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

def gridfonts(*args, **kwargs):
dataset = cx.Dataset()
url = "https://raw.githubusercontent.com/Calysto/conx/master/data/gridfonts.npy"
url = "https://raw.githubusercontent.com/Calysto/conx-data/master/gridfonts/gridfonts.npy"
path = get_file("gridfonts.npy", origin=url)
ds = np.load(path)
## [letters, labels]
Expand All @@ -18,12 +18,12 @@ def gridfonts(*args, **kwargs):
http://goosie.cogsci.indiana.edu/pub/gridfonts.data
![Gridfont Grid](https://github.com/Calysto/conx/raw/master/data/grid.png)
![Gridfont Grid](https://github.com/Calysto/conx-data/raw/master/gridfonts/grid.png)
These data have been processed to make them neural
network friendly:
https://github.com/Calysto/conx/blob/master/data/gridfonts.py
https://github.com/Calysto/conx-data/blob/master/gridfonts/gridfonts.py
The dataset is composed of letters on a 25 row x 9 column
grid. The inputs and targets are identical, and the labels
Expand All @@ -37,7 +37,7 @@ def gridfonts(*args, **kwargs):

def figure_ground_a(*args, **kwargs):
dataset = cx.Dataset()
url = "https://raw.githubusercontent.com/Calysto/conx/master/data/figure_ground_a.npy"
url = "https://raw.githubusercontent.com/Calysto/conx-data/master/gridfonts/figure_ground_a.npy"
path = get_file("figure_ground_a.npy", origin=url)
ds = np.load(path)
## [[[letter], [brim, body]], ...]
Expand All @@ -51,12 +51,12 @@ def figure_ground_a(*args, **kwargs):
http://goosie.cogsci.indiana.edu/pub/gridfonts.data
![Gridfont Grid](https://github.com/Calysto/conx/raw/master/data/grid.png)
![Gridfont Grid](https://github.com/Calysto/conx-data/raw/master/gridfonts/grid.png)
These data (all the letter A) have been processed to make them neural
network friendly:
https://github.com/Calysto/conx/blob/master/data/gridfonts.py
https://github.com/Calysto/conx-data/blob/master/gridfonts/gridfonts.py
The brim and body parts have been idenified manually. The dataset is
composed of letters on a 17 row x 9 column grid (4 lines not used on
Expand Down
4 changes: 2 additions & 2 deletions conx/datasets/_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
10, corresponding to the digit classification categories zero through
nine. Some example MNIST images are shown below:
![MNIST Images](https://github.com/Calysto/conx/raw/master/data/mnist_images.png)
![MNIST Images](https://github.com/Calysto/conx-data/raw/master/mnist/mnist_images.png)
"""

def mnist_h5(*args, **kwargs):
Expand All @@ -24,7 +24,7 @@ def mnist_h5(*args, **kwargs):
import h5py

path = "mnist.h5"
url = "https://raw.githubusercontent.com/Calysto/conx/master/data/mnist.h5"
url = "https://raw.githubusercontent.com/Calysto/conx-data/master/mnist/mnist.h5"
path = get_file(path, origin=url)
h5 = h5py.File(path, "r")
dataset = cx.Dataset()
Expand Down
6 changes: 3 additions & 3 deletions conx/datasets/cmu_faces.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def cmu_faces_full_size(*args, path="cmu_faces_full_size.npz", **kwargs):
dataset = cx.Dataset()
inputs, labels = load_dataset_npz(
path,
"https://raw.githubusercontent.com/Calysto/conx/master/data/cmu_faces_full_size.npz")
"https://raw.githubusercontent.com/Calysto/conx-data/master/cmu_faces/cmu_faces_full_size.npz")
dataset.name = "CMU Faces, full-size"
dataset.description = """
Original source: http://archive.ics.uci.edu/ml/datasets/cmu+face+images
Expand All @@ -16,7 +16,7 @@ def cmu_faces_quarter_size(*args, path="cmu_faces_quarter_size.npz", **kwargs):
dataset = cx.Dataset()
inputs, labels = load_dataset_npz(
path,
"https://raw.githubusercontent.com/Calysto/conx/master/data/cmu_faces_quarter_size.npz")
"https://raw.githubusercontent.com/Calysto/conx-data/master/cmu_faces/cmu_faces_quarter_size.npz")
dataset.name = "CMU Faces, quarter-size"
dataset.description = """
Original source: http://archive.ics.uci.edu/ml/datasets/cmu+face+images
Expand All @@ -27,7 +27,7 @@ def cmu_faces_half_size(*args, path="cmu_faces_half_size.npz", **kwargs):
dataset = cx.Dataset()
inputs, labels = load_dataset_npz(
path,
"https://raw.githubusercontent.com/Calysto/conx/master/data/cmu_faces_half_size.npz")
"https://raw.githubusercontent.com/Calysto/conx-data/master/cmu_faces/cmu_faces_half_size.npz")
dataset.name = "CMU Faces, half-size"
dataset.description = """
Original source: http://archive.ics.uci.edu/ml/datasets/cmu+face+images
Expand Down
18 changes: 13 additions & 5 deletions conx/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,19 @@

def progress_bar(*args, **kwargs):
if _PROGRESS_BAR is None:
return items
if args:
return lambda i: i
else:
return None
elif _PROGRESS_BAR == "notebook":
tqdm.tqdm_notebook(*args, **kwargs)
return tqdm.tqdm_notebook(*args, **kwargs)
elif _PROGRESS_BAR == "standard":
tqdm.tqdm(*args, **kwargs)
return tqdm.tqdm(*args, **kwargs)
else:
return items
raise Exception("no such progress bar: use None, 'notebook', or 'standard'")

def set_progress_bar(mode):
global _PROGRESS_BAR
if mode in [None, 'notebook', 'standard']:
_PROGRESS_BAR = mode
else:
Expand Down Expand Up @@ -569,7 +573,11 @@ def download(url, directory="./", force=False, unzip=True, filename=None,
print("Using cached %s as '%s'." % (url, file_path))
## Now, if it is a zip file, check to unzip:
if file_path.endswith(".zip") and os.path.isfile(file_path):
zip_ref = zipfile.ZipFile(file_path, 'r')
try:
zip_ref = zipfile.ZipFile(file_path, 'r')
except zipfile.BadZipFile:
print("URL appears to be invalid for zip file. Try deleteing the file and re-trying.")
return
# first, count existing unzipped files:
total_count = 0
for name in zip_ref.namelist():
Expand Down

0 comments on commit 9252d3f

Please sign in to comment.