Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for windows filepaths in dataset module #3504

Merged
merged 5 commits into from Dec 9, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
41 changes: 21 additions & 20 deletions pennylane/data/data_manager.py
Expand Up @@ -18,15 +18,16 @@
from collections.abc import Iterable
from concurrent.futures import ThreadPoolExecutor, wait, FIRST_EXCEPTION
import os
from os.path import sep as pathsep
from time import sleep
from urllib.parse import quote

import requests
from pennylane.data.dataset import Dataset

S3_URL = "https://xanadu-quantum-datasets.s3.amazonaws.com"
FOLDERMAP_URL = os.path.join(S3_URL, "foldermap.json")
DATA_STRUCT_URL = os.path.join(S3_URL, "data_struct.json")
FOLDERMAP_URL = f"{S3_URL}/foldermap.json"
DATA_STRUCT_URL = f"{S3_URL}/data_struct.json"

_foldermap = {}
_data_struct = {}
Expand Down Expand Up @@ -137,7 +138,8 @@ def _refresh_data_struct():

def _fetch_and_save(filename, dest_folder):
"""Download a single file from S3 and save it locally."""
response = requests.get(os.path.join(S3_URL, quote(filename)), timeout=5.0)
webfile = filename if pathsep == "/" else filename.replace(pathsep, "/")
response = requests.get(f"{S3_URL}/{quote(webfile)}", timeout=5.0)
response.raise_for_status()
with open(os.path.join(dest_folder, filename), "wb") as f:
f.write(response.content)
Expand All @@ -161,16 +163,16 @@ def _s3_download(data_name, folders, attributes, dest_folder, force, num_threads
if not os.path.exists(local_folder):
os.makedirs(local_folder)

prefix = os.path.join(data_name, folder, f"{folder.replace('/', '_')}_")
prefix = os.path.join(data_name, folder, f"{folder.replace(pathsep, '_')}_")
# TODO: consider combining files within a folder (switch to append)
files.extend([f"{prefix}{attr}.dat" for attr in attributes])

if not force:
start = len(dest_folder.rstrip("/")) + 1
start = len(dest_folder.rstrip(pathsep)) + 1
existing_files = {
os.path.join(path, name)[start:]
for path, _, files in os.walk(dest_folder)
for name in files
for path, _, local_files in os.walk(dest_folder)
for name in local_files
}
files = list(set(files) - existing_files)

Expand All @@ -196,13 +198,15 @@ def _generate_folders(node, folders):

next_folders = folders[1:]
folders = set(node) if folders[0] == ["full"] else set(folders[0]).intersection(set(node))
if not next_folders:
return folders
return [
os.path.join(folder, child)
for folder in folders
for child in _generate_folders(node[folder], next_folders)
]
return (
[
os.path.join(folder, child)
for folder in folders
for child in _generate_folders(node[folder], next_folders)
]
if next_folders
else folders
)


def load(
Expand Down Expand Up @@ -251,7 +255,7 @@ def load(
for folder in all_folders:
real_folder = os.path.join(directory_path, data_name, folder)
data_files.append(
Dataset(data_name, real_folder, folder.replace("/", "_"), docstring, standard=True)
Dataset(data_name, real_folder, folder.replace(pathsep, "_"), docstring, standard=True)
)

return data_files
Expand All @@ -263,10 +267,7 @@ def _direc_to_dict(path):
if not dirs:
return None
tree = {x: _direc_to_dict(os.path.join(root, x)) for x in dirs}
vals = [x is None for x in tree.values()]
if all(vals):
return list(dirs)
return tree
return list(dirs) if all(x is None for x in tree.values()) else tree


def list_datasets(path=None):
Expand Down Expand Up @@ -417,7 +418,7 @@ def load_interactive():
)

print("\nPlease confirm your choices:")
print("dataset:", os.path.join(data_name, *[description[param] for param in params]))
print("dataset:", "/".join([data_name] + [description[param] for param in params]))
print("attributes:", attributes)
print("force:", force)
print("dest folder:", os.path.join(dest_folder, "datasets"))
Expand Down
9 changes: 5 additions & 4 deletions pennylane/data/dataset.py
Expand Up @@ -106,7 +106,7 @@ class Dataset(ABC):
def __std_init__(self, data_name, folder, attr_prefix, docstring):
"""Constructor for standardized datasets."""
self._dtype = data_name
self._folder = folder.rstrip("/")
self._folder = folder.rstrip(os.path.sep)
self._prefix = os.path.join(self._folder, attr_prefix) + "_{}.dat"
self._prefix_len = len(attr_prefix) + 1
self._description = os.path.join(data_name, self._folder.split(data_name)[-1][1:])
Expand All @@ -116,7 +116,7 @@ def __std_init__(self, data_name, folder, attr_prefix, docstring):
if not os.path.exists(self._fullfile):
self._fullfile = None

for f in glob(self._folder + "/*.dat"):
for f in glob(f"{self._folder}{os.path.sep}*.dat"):
self.read(f, lazy=True)

def __base_init__(self, **kwargs):
Expand Down Expand Up @@ -144,8 +144,9 @@ def __repr__(self):
attr_str = (
str(list(self.attrs))
if len(self.attrs) < 3
else str(list(self.attrs)[:2])[:-1] + ", ...]"
else f"{str(list(self.attrs)[:2])[:-1]}, ...]"
)

std_str = f"description: {self._description}, " if self._is_standard else ""
return f"<Dataset = {std_str}attributes: {attr_str}>"

Expand Down Expand Up @@ -241,7 +242,7 @@ def __copy__(self):

# The methods below are intended for use only by standard Dataset objects
def __get_filename_for_attribute(self, attribute):
return self._fullfile if self._fullfile else self._prefix.format(attribute)
return self._fullfile or self._prefix.format(attribute)

def __get_attribute_from_filename(self, filename):
return os.path.basename(filename)[self._prefix_len : -4]
Expand Down