Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated Oxla results #181

Merged
merged 1 commit into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion index.html
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@
,{"system":"Motherduck","date":"2024-01-27","machine":"cloud","cluster_size":1,"tags":["C++","column-oriented","serverless"],"load_time":4122,"data_size":25332035584,"result":[[0.21968004433438182,0.3162029208615422,0.21604336006566882],[0.33489555725827813,0.29951094510033727,0.21411490021273494],[0.4648742200806737,0.3040045131929219,0.30952421203255653],[0.6499226209707558,0.22191201290115714,0.2888053017668426],[0.7704195859842002,0.921060211956501,0.819766622968018],[1.9447067943401635,1.2225632020272315,1.3308632392436266],[0.2486483482643962,0.22854855004698038,0.20795917278155684],[0.22081567207351327,0.2213178831152618,0.2327750907279551],[1.3735258071683347,1.2279518628492951,1.0233631990849972],[1.277890115045011,1.3313516019843519,1.1260047918185592],[0.6073150201700628,0.2593280617147684,0.3234165138565004],[0.34701970824971795,0.32168390695005655,0.3142702719196677],[1.0892055933363736,1.1165755479596555,0.8717481149360538],[1.5503880502656102,1.6257044719532132,1.464306827634573],[0.9123282572254539,0.7499142647720873,0.8189653856679797],[0.9769321829080582,0.8805147060193121,0.8598887426778674],[1.8943038629367948,2.4523583548143506,2.457844952121377],[2.4018522929400206,2.1315763420425355,2.188034536782652],[7.007666043005884,5.863597965799272,5.461284582037479],[0.24364612391218543,0.325953362043947,0.3064295807853341],[15.391365340910852,0.8924880139529705,0.8800020259805024],[1.0099374479614198,0.7060354948043823,0.7063306840136647],[8.954986380413175,4.095173366833478,3.279917892999947],[23.613256133161485,19.25096853263676,23.278139277826995],[1.5025351103395224,0.7977945390157402,0.6170151601545513],[0.46431142510846257,0.4349544760771096,0.36817489471286535],[0.5523030371405184,0.5110767548903823,0.3649540198966861],[4.98280710587278,2.0472186598926783,2.6902666091918945],[6.719612340908498,4.538661384955049,4.369533614721149],[3.358773229178041,3.1345412940718234,3.100874589756131],[2.000370934139937,1.3284917967393994,1.2289274740032852],[3.7089375513605773,2.681138473097235,1.91789041319862],[7.777549963910133,7.958697326015681,7.435122138820589],[9.89957385417074,6.938831991981715,5.757995248306543],[5.393568014726043,5.632281670346856,5.551704770885408],[1.4525640471838415,0.8954971083439887,0.9103033309802413],[0.22383009875193238,0.2152935261838138,0.21353887394070625],[0.2124440912157297,0.20737810991704464,0.20923770079389215],[0.2175677828490734,0.23641109932214022,0.22010038886219263],[0.24250942608341575,0.21723396237939596,0.2134688631631434],[0.21473077218979597,0.2085639163851738,0.251109620090574],[0.21749612782150507,0.24898552894592285,0.20936914114281535],[0.2193519319407642,0.2155164610594511,0.20863541215658188]],"source":"motherduck/results/result.json"}
,{"system":"MySQL (MyISAM)","date":"2022-07-01","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"","tags":["C++","row-oriented","MySQL compatible"],"load_time":2512,"data_size":121588958061,"result":[[0,0,0],[283.32,276.83,274.52],[276.93,278.29,283.27],[28.83,23.63,21.55],[46.41,40.81,40.93],[467.04,467.39,469.08],[31.02,25.89,24.2],[277.89,275.3,277.3],[329.34,325.8,325.35],[342.86,338.43,336.95],[282.03,279.87,281.22],[277.74,282.68,282],[335.66,334.83,336.44],[305.24,310.39,307.3],[337.41,338.52,342.94],[308.66,307.34,306.27],[738.38,748.44,740.75],[738.75,734.01,738.25],[867.01,872.92,868.84],[25.65,20.61,18.46],[312.39,313.67,306.66],[301.66,305.12,308.01],[298.12,298.44,312.4],[311.34,309.9,311.85],[281.87,278.5,275],[277.46,277.46,277.46],[280.75,278.04,281.76],[263.9,417.39,406.88],[707.21,711.96,705],[668.1,668.33,665.96],[330.31,333.36,331.94],[506.57,506.18,500.53],[2604.49,2681.96,2703.12],[830.65,832.88,831.14],[831.98,830.46,833.41],[608.49,608.51,613.68],[4.56,4.13,4.16],[3.8,3.8,3.7],[1.65,1.45,1.46],[6.33,5.14,6.15],[1.6,1.41,1.41],[1.56,1.42,1.39],[7.04,1.17,1.13]],"source":"mysql-myisam/results/c6a.4xlarge.json"}
,{"system":"MySQL","date":"2022-07-01","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"","tags":["C++","row-oriented","MySQL compatible"],"load_time":9472,"data_size":171953585825,"result":[[339.77,339.88,339.77],[364.91,371.86,367.55],[366.2,368.91,389.66],[364.39,377.53,571.45],[377.69,390.02,384.86],[569.48,576.51,574.68],[367.4,368.23,370.41],[371.29,384.02,613.22],[478.85,683.22,495.68],[489.9,635.96,662.43],[386.07,396.49,640.15],[389.13,412.55,444.12],[447.97,455.54,448.06],[423.22,845.44,813.6],[452.48,460.07,453.98],[577.54,623.21,586.49],[852.07,856.36,862.66],[838.09,848.92,851.12],[1006.37,1011.16,1023.17],[369.76,375.61,415.28],[412.45,419.9,456.62],[411.65,432.88,482.2],[412.73,420.73,429.5],[551.16,577.62,545.45],[382.89,394.76,386.37],[380.9,391.4,385.05],[385.3,394.67,460.32],[388.95,394.7,387.21],[800.33,807.9,807.11],[706.03,745.27,718.9],[450.9,489.59,530.97],[625.5,651.93,647.32],[2721.13,2792.12,2819.26],[945.9,954.94,957.54],[945.42,953.78,965.16],[684.36,716.29,708.75],[10.01,3.79,3.77],[7.48,3.32,3.27],[5.09,0.98,0.96],[8.7,4.77,4.68],[4.82,0.76,0.74],[4.46,0.77,0.75],[7.04,1.17,1.13]],"source":"mysql/results/c6a.4xlarge.json"}
,{"system":"Oxla.com","date":"2024-01-31","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"ingests data only from non-compressed cvs. data should be ingested in chunks < ~5Gb","tags":["C","analytical","somewhat PostgreSQL compatible"],"load_time":584.88,"data_size":83948840996,"result":[[0.050129,0.022855,0.022903],[0.057503,0.015481,0.015516],[0.094277,0.025541,0.026299],[0.16258,0.024306,0.023829],[0.039942,0.021789,0.02218],[0.1374,0.101147,0.099119],[0.059388,0.025719,0.0264],[0.033712,0.017465,0.0184],[0.135859,0.096702,0.095512],[0.171016,0.155303,0.154772],[0.190669,0.182083,0.183751],[0.22111,0.18886,0.193252],[2.52142,2.55214,2.54007],[2.6807,2.5843,2.63534],[2.75055,2.68715,2.67322],[1.52986,1.47056,1.49706],[8.22161,7.84746,7.57008],[7.06027,7.33078,6.88351],[10.5416,10.2955,10.2149],[0.053765,0.02466,0.023807],[2.67538,2.54948,2.71025],[null,null,null],[null,null,null],[8.75207,8.09075,8.26788],[0.359349,0.31487,0.312975],[0.331405,0.339576,0.332647],[0.351841,0.340408,0.325682],[1.62115,1.67895,1.74872],[null,null,null],[1.58662,1.58356,1.58278],[0.632047,0.553994,0.586148],[1.15181,1.14935,1.10892],[13.1952,13.0677,12.6957],[15.9073,13.0187,12.6488],[12.3274,13.7521,14.7386],[1.27877,1.35705,1.24621],[1.011,0.955655,0.967898],[3.46346,0.841062,0.878294],[0.867865,0.885799,0.812358],[4.55556,1.79754,1.75293],[4.00301,0.064817,0.062893],[0.163954,0.063076,0.060033],[null,null,null]],"source":"oxla/results/c6a.4xlarge.json"}
,{"system":"Oxla","date":"2024-04-09","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"Ingests data only from non-compressed cvs.","tags":["C","analytical","somewhat PostgreSQL compatible"],"load_time":497.815,"data_size":17394972923,"result":[[3.112892,0.068225,0.049215],[1.476993,0.068502,0.01352],[1.532504,0.015794,0.01899],[1.541791,0.043208,0.090244],[1.424205,1.114138,1.079011],[1.546764,1.340306,1.339957],[0.202873,0.009787,0.008214],[1.017122,0.01247,0.010861],[1.790766,1.816432,1.681662],[2.06058,2.051205,2.05747],[0.166164,0.149605,0.147586],[0.33821,0.15334,0.15212],[0.968408,0.975795,0.932127],[1.641231,1.648973,1.69153],[1.039926,1.021776,1.015062],[1.059569,1.038191,1.016849],[2.930077,2.780725,2.786122],[2.7766,2.745188,2.827054],[5.474963,5.455883,5.462812],[0.069049,0.037876,0.030425],[5.294758,2.818725,2.803313],[null,null,null],[null,null,null],[21.034479,18.253271,6.146486],[0.17394,0.151798,0.146398],[0.180155,0.170271,0.177003],[0.22494,0.216158,0.216051],[0.978861,0.973059,0.964485],[null,null,null],[0.030928,0.02037,0.020366],[0.408601,0.412485,0.408602],[0.875709,0.743332,0.704842],[7.962516,7.867736,7.594272],[6.209667,5.892066,5.963681],[5.931634,5.947336,6.005506],[0.577314,0.583573,0.545736],[0.126127,0.090768,0.094307],[0.110712,0.04149,0.039939],[0.060824,0.043637,0.030213],[0.322545,0.204934,0.185178],[0.121207,0.011082,0.011699],[0.069138,0.012728,0.014108],[0.030538,0.028048,0.030625]],"source":"oxla/results/c6a.4xlarge.json"}
,{"hide":false,"system":"ParadeDB","date":"2024-02-02","machine":"c6a.4xlarge, 1500gb gp2","cluster_size":1,"comment":"The results for (c6a.4xlarge, 500gb gp2) are also submitted here for easy comparison with Elasticsearch","tags":["Rust","row-oriented","column-oriented","search","PostgreSQL compatible"],"load_time":1294,"data_size":15415061091,"result":[[0.170805,0.005724,0.006098],[0.207075,0.09786,0.097367],[0.223766,0.088988,0.083545],[0.428068,0.097471,0.098085],[1.02813,0.842475,0.847983],[1.47076,1.27149,1.24457],[0.079231,0.005903,0.006381],[0.20438,0.102631,0.101062],[1.61474,1.57814,1.57654],[1.34235,1.09032,1.07038],[0.624544,0.350221,0.354308],[0.592603,0.38256,0.382334],[1.5044,1.32063,1.31768],[3.54282,2.67601,2.61359],[1.66994,1.46717,1.4526],[1.16541,0.999982,0.975601],[3.36336,2.90587,2.89977],[3.1358,2.75251,2.71843],[7.57537,5.9709,6.09835],[0.450773,0.136726,0.134937],[9.75662,1.39915,1.44835],[11.2973,1.70324,1.6996],[22.2634,4.01717,3.96894],[57.8901,10.7998,10.8794],[3.10183,0.605149,0.595146],[0.829921,0.542481,0.531427],[3.01411,0.668447,0.680061],[9.88178,2.28414,2.28912],[9.13347,5.15906,5.15358],[0.599997,0.454952,0.463381],[2.34495,1.18331,1.19755],[5.76726,1.62152,1.52735],[8.98009,8.85184,8.65374],[11.9086,6.49934,6.7125],[12.2924,7.10393,7.21253],[2.04731,1.8265,1.84334],[0.274516,0.252795,0.251755],[0.338633,0.253551,0.252828],[0.339914,0.254548,0.253683],[0.244831,0.158776,0.158403],[0.382238,0.253784,0.254412],[0.404016,0.253894,0.253025],[0.358676,0.204648,0.204944]],"source":"paradedb/results/c6a.4xlarge.1500gb.json"}
,{"hide":false,"system":"ParadeDB","date":"2024-02-02","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"","tags":["Rust","row-oriented","column-oriented","search","PostgreSQL compatible"],"load_time":1294,"data_size":15415061091,"result":[[0.170805,0.005724,0.006098],[0.207075,0.09786,0.097367],[0.223766,0.088988,0.083545],[0.428068,0.097471,0.098085],[1.02813,0.842475,0.847983],[1.47076,1.27149,1.24457],[0.079231,0.005903,0.006381],[0.20438,0.102631,0.101062],[1.61474,1.57814,1.57654],[1.34235,1.09032,1.07038],[0.624544,0.350221,0.354308],[0.592603,0.38256,0.382334],[1.5044,1.32063,1.31768],[3.54282,2.67601,2.61359],[1.66994,1.46717,1.4526],[1.16541,0.999982,0.975601],[3.36336,2.90587,2.89977],[3.1358,2.75251,2.71843],[7.57537,5.9709,6.09835],[0.450773,0.136726,0.134937],[9.75662,1.39915,1.44835],[11.2973,1.70324,1.6996],[22.2634,4.01717,3.96894],[57.8901,10.7998,10.8794],[3.10183,0.605149,0.595146],[0.829921,0.542481,0.531427],[3.01411,0.668447,0.680061],[9.88178,2.28414,2.28912],[9.13347,5.15906,5.15358],[0.599997,0.454952,0.463381],[2.34495,1.18331,1.19755],[5.76726,1.62152,1.52735],[8.98009,8.85184,8.65374],[11.9086,6.49934,6.7125],[12.2924,7.10393,7.21253],[2.04731,1.8265,1.84334],[0.274516,0.252795,0.251755],[0.338633,0.253551,0.252828],[0.339914,0.254548,0.253683],[0.244831,0.158776,0.158403],[0.382238,0.253784,0.254412],[0.404016,0.253894,0.253025],[0.358676,0.204648,0.204944]],"source":"paradedb/results/c6a.4xlarge.json"}
,{"system":"Pinot","date":"2022-07-01","machine":"c6a.4xlarge, 500gb gp2","cluster_size":1,"comment":"It successfully loaded only 94465149 out of 99997497 records. Some queries returned NullPointerException. The loading process is painful - splitting to 100 pieces required. It does not correctly report errors on data loading, the results may be incorrect.","tags":["Java","column-oriented"],"load_time":2032,"data_size":null,"result":[[0.002,0.001,0.001],[0.186,0.186,0.185],[0.251,0.276,0.258],[0.475,0.281,0.238],[3.907,3.655,3.633],[30.471,14.687,14.93],[null,null,null],[0.135,0.134,0.148],[3.039,2.902,2.938],[3.159,3.212,3.225],[4.217,4.197,4.384],[4.145,4.124,4.121],[2.989,3.145,3.18],[6.402,6.886,6.374],[3.245,3.35,3.129],[5.112,5.027,5.141],[5.509,5.279,5.257],[0.865,0.856,0.829],[null,null,null],[0.017,0.015,0.015],[54.348,19.562,19.128],[null,null,null],[76.596,74.719,14.228],[7.441,5.77,5.87],[0.376,0.327,0.286],[7.689,0.395,1.281],[3.434,0.499,0.5],[27.679,2.378,2.393],[null,null,null],[2.221,2.227,2.167],[4.941,4.639,4.565],[5.641,5.37,5.007],[5.295,5.006,5.357],[5.28,5.21,5.105],[6.231,6.238,6.385],[5.918,5.933,5.934],[0.26,0.202,0.21],[0.364,0.072,0.069],[0.042,0.034,0.035],[1.483,0.686,0.651],[0.113,0.071,0.079],[0.042,0.051,0.037],[null,null,null]],"source":"pinot/results/c6a.4xlarge.json"}
Expand Down
109 changes: 15 additions & 94 deletions oxla/benchmark.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
#!/bin/bash -e

# cleanup
sudo ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
sudo ps aux | grep oxla | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9

# docker
sudo rm /usr/share/keyrings/docker-archive-keyring.gpg
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt update
Expand All @@ -15,109 +10,35 @@ sudo apt install -y docker-ce
sudo apt-get install -y postgresql-client curl wget apt-transport-https ca-certificates software-properties-common gnupg2 parallel
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential

# ruby and fake S3
sudo apt install -y ruby-full
sudo gem install bundler fakes3 webrick sorted_set

# install aws cli tools
sudo rm /usr/local/bin/aws
sudo rm /usr/local/bin/aws_completer
sudo rm -rf /usr/local/aws-cli
sudo rm -rf ~/.aws/ aws

curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
unzip awscliv2.zip
sudo ./aws/install --update
/usr/local/bin/aws --version
rm -f awscliv2.zip

# configure aws
mkdir -p ~/.aws
echo -e "[default]\nregion = none" > ~/.aws/config
echo -e "[default]\naws_access_key_id = none\naws_secret_access_key = none" > ~/.aws/credentials

# run fake S3
sudo ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
sudo rm -rf /mnt/fakes3_root
sudo mkdir -p /mnt/fakes3_root
sudo chmod a+rw /mnt/fakes3_root -R
fakes3 -r /mnt/fakes3_root -H 0.0.0.0 -p 4569 --license license.pdf > /dev/null 2>&1 &
sleep 10 # waiting for container start

# download dataset
wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz'
gzip -d hits.tsv.gz
chmod 777 ~ hits.tsv

# convert dataset to csv
rm -f part_*.csv
curl https://clickhouse.com/ | sh
./clickhouse local --query "SELECT * FROM 'hits.tsv' INTO OUTFILE 'hits.csv'"
rm hits.tsv

# prepare digestable parts (5m rows each) of hits.csv
split -l 5000000 hits.csv part_
for file in part_*; do mv "$file" "${file}.csv"; done

# upload dataset (prepared parts) to fake S3 bucket
aws s3 mb s3://my-new-bucket --endpoint-url http://localhost:4569

for file in part_*.csv; do
echo "Processing file: $file"

# copy the file to the S3 bucket
aws s3 cp "./$file" s3://my-new-bucket --endpoint-url http://localhost:4569 > /dev/null 2>&1

# clean-up tmp parts left after upload
TMPPARTS=$(aws s3api list-objects --bucket my-new-bucket --query "Contents[?contains(Key, '_${file}_')].Key" --output text --endpoint-url http://localhost:4569)
echo $TMPPARTS | tr ' ' '\n' | grep . | parallel -j16 aws s3api delete-object --bucket my-new-bucket --key {} --endpoint-url http://localhost:4569
done
echo "Download dataset."
wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz'
echo "Unpack dataset."
gzip -d hits.csv.gz
chmod 777 ~ hits.csv
mkdir data
mv hits.csv ~/data

# get and configure Oxla image
echo "Install and run Oxla."

sudo ps aux | grep oxla | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9

sudo docker run --rm -p 5432:5432 --name oxlacontainer public.ecr.aws/oxla/release:latest > /dev/null 2>&1 &
sleep 10 # waiting for container start and db initialisation (leader election, etc.)

sudo docker exec oxlacontainer /bin/bash -c "sed -i 's#endpoint: \"\"#endpoint: \"http://localhost:4569\"#g' oxla/default_config.yml"
sudo docker exec oxlacontainer /bin/bash -c "sed -i 's#endpoint:.*#endpoint: '\''http://localhost:4569'\''#g' oxla/startup_config/config.yml"
sudo docker rmi oxla-configured-image:latest > /dev/null 2>&1 || echo "" > /dev/null
sudo docker commit oxlacontainer oxla-configured-image

sudo ps aux | grep oxla | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9

# run oxla
sudo docker run --rm --net=host --name oxlacontainer oxla-configured-image > /dev/null 2>&1 &
sleep 10 # waiting for container start and db initialisation (leader election, etc.)
sudo docker run --rm -p 5432:5432 -v ~/data:/data --name oxlacontainer public.ecr.aws/oxla/release:1.20.0-beta > /dev/null 2>&1 &
sleep 30 # waiting for container start and db initialisation (leader election, etc.)

# create table and ingest data
export PGCLIENTENCODING=UTF8
psql -h localhost -p 5432 -U postgres -t -c 'CREATE SCHEMA test'
psql -h localhost -p 5432 -U postgres -d test -t < create.sql

for file in part_*.csv; do
echo "Processing file: $file"
psql -h localhost -p 5432 -U postgres -d test -t -c '\timing' -c "COPY hits FROM 's3://my-new-bucket/$file';"
aws s3api delete-object --bucket my-new-bucket --key "$file" --endpoint-url http://localhost:4569
done
psql -h localhost -t < create.sql
echo "Insert data."
psql -h localhost -t -c '\timing' -c "COPY hits FROM '/data/hits.csv';"

# get ingested data size
echo "data size after ingest:"
sudo docker exec oxlacontainer /bin/bash -c "du -s oxla/data"
psql -h localhost -t -c '\timing' -c "SELECT pg_total_relation_size('hits');"

# wait for merges to finish
sleep 30

# kill fake S3 and remove its data
ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
sudo rm -rf /mnt/fakes3_root
sleep 60

# run benchmark
echo "running benchmark..."
./run.sh 2>&1 | tee log.txt

# format results
cat log.txt | grep -oP 'Time: \d+\.\d+ ms' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' |
awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
./run.sh
1 change: 1 addition & 0 deletions oxla/create.sql
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,4 @@ CREATE TABLE hits
URLHash BIGINT NOT NULL,
CLID INTEGER NOT NULL
);
CREATE INDEX hits ON hits(CounterID, EventDate, UserID, EventTime, WatchID);