Skip to content
This repository has been archived by the owner on Nov 8, 2022. It is now read-only.

Commit

Permalink
update q05 data generation
Browse files Browse the repository at this point in the history
  • Loading branch information
Ehsan Totoni committed Oct 14, 2016
1 parent dac25bc commit cad25a1
Showing 1 changed file with 13 additions and 13 deletions.
26 changes: 13 additions & 13 deletions examples/queries_devel/q05/generate-dataset.sh
Expand Up @@ -2,12 +2,12 @@
#set -e
set -x

BIG_DATA_BENCHMARK_PATH=/home/whassan/Big-Data-Benchmark-for-Big-Bench/
DATASET_PATH=/home.old/whassan/tmp/csv/q05/
BIG_DATA_BENCHMARK_PATH=/home/etotoni/Downloads/Big-Data-Benchmark-for-Big-Bench/
DATASET_PATH=/srv/data/tmp/q05/
mkdir -p ${DATASET_PATH}
NUM_MAP_TASKS=2

for dataset_factor in 200 250; do
for dataset_factor in 300 400 500; do
if [ -f ${DATASET_PATH}/web_clickstreams_${dataset_factor}f.dat ]; then
rm ${DATASET_PATH}/web_clickstreams_${dataset_factor}f.dat
fi
Expand All @@ -24,17 +24,17 @@ for dataset_factor in 200 250; do
${BIG_DATA_BENCHMARK_PATH}/bin/bigBench dataGen -U -m ${NUM_MAP_TASKS} -f $dataset_factor

for i in $(seq ${NUM_MAP_TASKS}); do
hadoop fs -cat /home.old/whassan/user/whassan/benchmarks/bigbench/data/web_clickstreams/web_clickstreams_${i}.dat > ${DATASET_PATH}/web_clickstreams_${i}_${dataset_factor}f.dat
hadoop fs -cat /user/etotoni/benchmarks/bigbench/data/web_clickstreams/web_clickstreams_${i}.dat > ${DATASET_PATH}/web_clickstreams_${i}_${dataset_factor}f.dat
hadoop dfsadmin -safemode leave
hadoop fs -rmr /home.old/whassan/user/whassan/benchmarks/bigbench/data/web_clickstreams/web_clickstreams_${i}.dat
hadoop fs -cat /home.old/whassan/user/whassan/benchmarks/bigbench/data/item/item_${i}.dat > ${DATASET_PATH}/item_${i}_${dataset_factor}f.dat
hadoop fs -rmr /home.old/whassan/user/whassan/benchmarks/bigbench/data/item/item_${i}.dat
hadoop fs -cat /home.old/whassan/user/whassan/benchmarks/bigbench/data/customer/customer_${i}.dat > ${DATASET_PATH}/customer_${i}_${dataset_factor}f.dat
hadoop fs -rmr /home.old/whassan/user/whassan/benchmarks/bigbench/data/customer/customer_${i}.dat
hadoop fs -rmr /user/etotoni/benchmarks/bigbench/data/web_clickstreams/web_clickstreams_${i}.dat
hadoop fs -cat /user/etotoni/benchmarks/bigbench/data/item/item_${i}.dat > ${DATASET_PATH}/item_${i}_${dataset_factor}f.dat
hadoop fs -rmr /user/etotoni/benchmarks/bigbench/data/item/item_${i}.dat
hadoop fs -cat /user/etotoni/benchmarks/bigbench/data/customer/customer_${i}.dat > ${DATASET_PATH}/customer_${i}_${dataset_factor}f.dat
hadoop fs -rmr /user/etotoni/benchmarks/bigbench/data/customer/customer_${i}.dat
# For some reasons second data is not generated
#if [ $i -lt 2 ]; then
hadoop fs -cat /home.old/whassan/user/whassan/benchmarks/bigbench/data/customer_demographics/customer_demographics_${i}.dat > ${DATASET_PATH}/customer_demographics_${i}_${dataset_factor}f.dat
hadoop fs -rmr /home.old/whassan/user/whassan/benchmarks/bigbench/data/customer_demographics/customer_demographics_${i}.dat
hadoop fs -cat /user/etotoni/benchmarks/bigbench/data/customer_demographics/customer_demographics_${i}.dat > ${DATASET_PATH}/customer_demographics_${i}_${dataset_factor}f.dat
hadoop fs -rmr /user/etotoni/benchmarks/bigbench/data/customer_demographics/customer_demographics_${i}.dat
#fi
cat ${DATASET_PATH}/web_clickstreams_${i}_${dataset_factor}f.dat | cut -d'|' -f4,6 > ${DATASET_PATH}/web_clickstreams_sanitized_${i}_${dataset_factor}f.dat
rm ${DATASET_PATH}/web_clickstreams_${i}_${dataset_factor}f.dat
Expand All @@ -61,11 +61,11 @@ for dataset_factor in 200 250; do
cat ${DATASET_PATH}/customer_sanitized_[0-9]_${dataset_factor}f.dat > ${DATASET_PATH}/customer_sanitized_${dataset_factor}f.csv
cat ${DATASET_PATH}/customer_demographics_sanitized_[0-9]_${dataset_factor}f.dat > ${DATASET_PATH}/customer_demographics_sanitized_${dataset_factor}f.csv
hadoop dfsadmin -safemode leave
hadoop fs -rmr /home.old/whassan/user/whassan/benchmarks/bigbench/data/
hadoop fs -rmr /user/etotoni/benchmarks/bigbench/data/

rm ${DATASET_PATH}/web_clickstreams_sanitized_[0-9]_${dataset_factor}f.dat
rm ${DATASET_PATH}/item_sanitized_[0-9]_${dataset_factor}f.dat
rm ${DATASET_PATH}/customer_sanitized_[0-9]_${dataset_factor}f.dat
rm ${DATASET_PATH}/customer_demographics_sanitized_[0-9]_${dataset_factor}f.dat

done
done

0 comments on commit cad25a1

Please sign in to comment.