From 5f655c93c5db3e9cdf0a693ad81979cf1ccd35fe Mon Sep 17 00:00:00 2001 From: Dat Le Date: Mon, 2 Oct 2017 21:17:58 +0800 Subject: [PATCH] Fix README for Netflix Test --- .gitignore | 6 ++++++ README.md | 25 ++++++------------------- data_utils/netflix_data_convert.py | 13 ++++++++++++- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index 7bbc71c..038a6cc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,9 @@ +# Data files and folders +download/ +training_set/ +nf_prize_dataset.tar.gz +Netflix/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/README.md b/README.md index 39a5701..a1b893e 100644 --- a/README.md +++ b/README.md @@ -22,29 +22,16 @@ $ python -m unittest test/test_model.py ### Get the data +**Note: Run all these commands within your `DeepRecommender` folder** + [Netflix prize](http://netflixprize.com/) -* ```$ mkdir -p ~/Recommendations``` you can use any other folder name -* Download from [here](http://academictorrents.com/details/9b13183dc4d60676b773c9e2cd6de5e5542cee9a) to ```~/Recommendations``` -* ```$ cd ~/Recommendations``` -* ```$ tar -xvf nf_prize_dataset.tar.gz``` -* ```$ tar -xf download/training_set.tar ``` -* Create necessary folders +* Download from [here](http://academictorrents.com/details/9b13183dc4d60676b773c9e2cd6de5e5542cee9a) into your ```DeepRecommender``` folder ``` -mkdir -p Netflix/N3M_TRAIN -mkdir -p Netflix/N3M_VALID -mkdir -p Netflix/N3M_TEST -mkdir -p Netflix/N6M_TRAIN -mkdir -p Netflix/N6M_VALID -mkdir -p Netflix/N6M_TEST -mkdir -p Netflix/N1Y_TRAIN -mkdir -p Netflix/N1Y_VALID -mkdir -p Netflix/N1Y_TEST -mkdir -p Netflix/NF_TRAIN -mkdir -p Netflix/NF_VALID -mkdir -p Netflix/NF_TEST +$ tar -xvf nf_prize_dataset.tar.gz +$ tar -xf download/training_set.tar +$ python ./data_utils/netflix_data_convert.py training_set Netflix ``` -* ```$ python ./data_utils/netflix_data_convert.py training_set Netflix```. (run from your `DeepRecommender` folder) #### Data stats | Dataset | Netflix 3 months | Netflix 6 months | Netflix 1 year | Netflix full | diff --git a/data_utils/netflix_data_convert.py b/data_utils/netflix_data_convert.py index 5d82523..45df020 100644 --- a/data_utils/netflix_data_convert.py +++ b/data_utils/netflix_data_convert.py @@ -1,5 +1,5 @@ # Copyright (c) 2017 NVIDIA Corporation -from os import listdir, path +from os import listdir, path, makedirs import random import sys import time @@ -75,6 +75,17 @@ def create_NETFLIX_data_timesplit(all_data, def main(args): + # create necessary folders: + for output_dir in [ + "Netflix/N3M_TRAIN", "Netflix/N3M_VALID", "Netflix/N3M_TEST", "Netflix/N6M_TRAIN", + "Netflix/N6M_VALID", "Netflix/N6M_TEST", "Netflix/N1Y_TRAIN", "Netflix/N1Y_VALID", + "Netflix/N1Y_TEST", "Netflix/NF_TRAIN", "Netflix/NF_VALID", "Netflix/NF_TEST"]: + try: + makedirs(output_dir) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + user2id_map = dict() item2id_map = dict() userId = 0