diff --git a/examples/queries_devel/q05/generate-dataset.sh b/examples/queries_devel/q05/generate-dataset.sh index 9fa1cd3..6d10529 100755 --- a/examples/queries_devel/q05/generate-dataset.sh +++ b/examples/queries_devel/q05/generate-dataset.sh @@ -2,12 +2,12 @@ #set -e set -x -BIG_DATA_BENCHMARK_PATH=/home/whassan/Big-Data-Benchmark-for-Big-Bench/ -DATASET_PATH=/home.old/whassan/tmp/csv/q05/ +BIG_DATA_BENCHMARK_PATH=/home/etotoni/Downloads/Big-Data-Benchmark-for-Big-Bench/ +DATASET_PATH=/srv/data/tmp/q05/ mkdir -p ${DATASET_PATH} NUM_MAP_TASKS=2 -for dataset_factor in 200 250; do +for dataset_factor in 300 400 500; do if [ -f ${DATASET_PATH}/web_clickstreams_${dataset_factor}f.dat ]; then rm ${DATASET_PATH}/web_clickstreams_${dataset_factor}f.dat fi @@ -24,17 +24,17 @@ for dataset_factor in 200 250; do ${BIG_DATA_BENCHMARK_PATH}/bin/bigBench dataGen -U -m ${NUM_MAP_TASKS} -f $dataset_factor for i in $(seq ${NUM_MAP_TASKS}); do - hadoop fs -cat /home.old/whassan/user/whassan/benchmarks/bigbench/data/web_clickstreams/web_clickstreams_${i}.dat > ${DATASET_PATH}/web_clickstreams_${i}_${dataset_factor}f.dat + hadoop fs -cat /user/etotoni/benchmarks/bigbench/data/web_clickstreams/web_clickstreams_${i}.dat > ${DATASET_PATH}/web_clickstreams_${i}_${dataset_factor}f.dat hadoop dfsadmin -safemode leave - hadoop fs -rmr /home.old/whassan/user/whassan/benchmarks/bigbench/data/web_clickstreams/web_clickstreams_${i}.dat - hadoop fs -cat /home.old/whassan/user/whassan/benchmarks/bigbench/data/item/item_${i}.dat > ${DATASET_PATH}/item_${i}_${dataset_factor}f.dat - hadoop fs -rmr /home.old/whassan/user/whassan/benchmarks/bigbench/data/item/item_${i}.dat - hadoop fs -cat /home.old/whassan/user/whassan/benchmarks/bigbench/data/customer/customer_${i}.dat > ${DATASET_PATH}/customer_${i}_${dataset_factor}f.dat - hadoop fs -rmr /home.old/whassan/user/whassan/benchmarks/bigbench/data/customer/customer_${i}.dat + hadoop fs -rmr /user/etotoni/benchmarks/bigbench/data/web_clickstreams/web_clickstreams_${i}.dat + hadoop fs -cat /user/etotoni/benchmarks/bigbench/data/item/item_${i}.dat > ${DATASET_PATH}/item_${i}_${dataset_factor}f.dat + hadoop fs -rmr /user/etotoni/benchmarks/bigbench/data/item/item_${i}.dat + hadoop fs -cat /user/etotoni/benchmarks/bigbench/data/customer/customer_${i}.dat > ${DATASET_PATH}/customer_${i}_${dataset_factor}f.dat + hadoop fs -rmr /user/etotoni/benchmarks/bigbench/data/customer/customer_${i}.dat # For some reasons second data is not generated #if [ $i -lt 2 ]; then - hadoop fs -cat /home.old/whassan/user/whassan/benchmarks/bigbench/data/customer_demographics/customer_demographics_${i}.dat > ${DATASET_PATH}/customer_demographics_${i}_${dataset_factor}f.dat - hadoop fs -rmr /home.old/whassan/user/whassan/benchmarks/bigbench/data/customer_demographics/customer_demographics_${i}.dat + hadoop fs -cat /user/etotoni/benchmarks/bigbench/data/customer_demographics/customer_demographics_${i}.dat > ${DATASET_PATH}/customer_demographics_${i}_${dataset_factor}f.dat + hadoop fs -rmr /user/etotoni/benchmarks/bigbench/data/customer_demographics/customer_demographics_${i}.dat #fi cat ${DATASET_PATH}/web_clickstreams_${i}_${dataset_factor}f.dat | cut -d'|' -f4,6 > ${DATASET_PATH}/web_clickstreams_sanitized_${i}_${dataset_factor}f.dat rm ${DATASET_PATH}/web_clickstreams_${i}_${dataset_factor}f.dat @@ -61,11 +61,11 @@ for dataset_factor in 200 250; do cat ${DATASET_PATH}/customer_sanitized_[0-9]_${dataset_factor}f.dat > ${DATASET_PATH}/customer_sanitized_${dataset_factor}f.csv cat ${DATASET_PATH}/customer_demographics_sanitized_[0-9]_${dataset_factor}f.dat > ${DATASET_PATH}/customer_demographics_sanitized_${dataset_factor}f.csv hadoop dfsadmin -safemode leave - hadoop fs -rmr /home.old/whassan/user/whassan/benchmarks/bigbench/data/ + hadoop fs -rmr /user/etotoni/benchmarks/bigbench/data/ rm ${DATASET_PATH}/web_clickstreams_sanitized_[0-9]_${dataset_factor}f.dat rm ${DATASET_PATH}/item_sanitized_[0-9]_${dataset_factor}f.dat rm ${DATASET_PATH}/customer_sanitized_[0-9]_${dataset_factor}f.dat rm ${DATASET_PATH}/customer_demographics_sanitized_[0-9]_${dataset_factor}f.dat -done \ No newline at end of file +done