Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: flush job #80

Merged
merged 3 commits into from
Jul 14, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/storage/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ snafu = { version = "0.7", features = ["backtraces"] }
store-api = { path = "../store-api" }
regex = "1.5"
tokio = { version = "1.18", features = ["full"] }
uuid = { version = "1.1" , features=["v4"]}

[dev-dependencies]
tempdir = "0.3"
11 changes: 8 additions & 3 deletions src/storage/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ pub enum Error {

#[snafu(display("Invalid timestamp in write batch, source: {}", source))]
InvalidTimestamp { source: crate::write_batch::Error },

#[snafu(display("Task already cancelled"))]
Canceled { backtrace: Backtrace },
evenyag marked this conversation as resolved.
Show resolved Hide resolved
}

pub type Result<T> = std::result::Result<T, Error>;
Expand All @@ -133,9 +136,11 @@ impl ErrorExt for Error {
| BatchMissingColumn { .. }
| InvalidTimestamp { .. } => StatusCode::InvalidArguments,

Utf8 { .. } | EncodeJson { .. } | DecodeJson { .. } | JoinTask { .. } => {
StatusCode::Unexpected
}
Utf8 { .. }
| EncodeJson { .. }
| DecodeJson { .. }
| JoinTask { .. }
| Canceled { .. } => StatusCode::Unexpected,

FlushIo { .. }
| InitBackend { .. }
Expand Down
49 changes: 42 additions & 7 deletions src/storage/src/flush.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@ use common_time::RangeMillis;
use store_api::manifest::Manifest;
use store_api::manifest::ManifestVersion;
use store_api::storage::SequenceNumber;
use uuid::Uuid;

use crate::background::{Context, Job, JobHandle, JobPoolRef};
use crate::error::Result;
use crate::error::{CanceledSnafu, Result};
use crate::manifest::action::*;
use crate::memtable::MemtableRef;
use crate::memtable::{IterContext, MemtableRef};
use crate::region::RegionWriterRef;
use crate::region::SharedDataRef;
use crate::sst::{AccessLayerRef, FileMeta};
use crate::sst::{AccessLayerRef, FileMeta, WriteOptions};
use crate::version::VersionEdit;

/// Default write buffer size (32M).
Expand Down Expand Up @@ -148,12 +149,41 @@ pub struct FlushJob {
impl FlushJob {
async fn write_memtables_to_layer(&self, ctx: &Context) -> Result<Vec<FileMeta>> {
if ctx.is_cancelled() {
// TODO(yingwen): [flush] Returns an cancelled error.
unimplemented!();
return CanceledSnafu {}.fail();
}

// TODO(yingwen): [flush] Flush memtables to sst layer.
unimplemented!()
let mut futures = Vec::with_capacity(self.memtables.len());
for m in &self.memtables {
let file_name = Self::generate_sst_file_name();
// TODO(hl): Check if random file name already exists in meta.

let row_group_size = 128; // row group size should be same as iterator batch size.
let iter_ctx = IterContext {
v0y4g3r marked this conversation as resolved.
Show resolved Hide resolved
batch_size: row_group_size,
visible_sequence: 0, // not used if `for_flush` set to true.
for_flush: true,
};

let iter = m.memtable.iter(iter_ctx)?;
let future = self
.sst_layer
.write_sst(file_name.clone(), iter, WriteOptions::default());
futures.push(future);
}

let metas = futures_util::future::join_all(futures)
.await
.into_iter()
.collect::<Result<Vec<_>>>()?
.into_iter()
.map(|f| FileMeta {
file_path: f,
level: 0,
})
.collect();

logging::info!("Successfully flush memtables to files: {:?}", metas);
Ok(metas)
}

async fn write_to_manifest(&self, file_metas: &[FileMeta]) -> Result<ManifestVersion> {
Expand All @@ -169,6 +199,11 @@ impl FlushJob {
.update(RegionMetaAction::Edit(edit))
.await
}

/// Generates random SST file name
fn generate_sst_file_name() -> String {
Uuid::new_v4().urn().to_string() + ".parquet"
v0y4g3r marked this conversation as resolved.
Show resolved Hide resolved
}
}

#[async_trait]
Expand Down
21 changes: 10 additions & 11 deletions src/storage/src/sst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,13 @@ pub struct WriteOptions {
/// Sst access layer.
#[async_trait]
pub trait AccessLayer: Send + Sync {
// Writes SST file with given name and returns the full path.
async fn write_sst(
&self,
file_name: &str,
file_name: String,
iter: BatchIteratorPtr,
opts: WriteOptions,
) -> Result<()>;
) -> Result<String>;
}

pub type AccessLayerRef = Arc<dyn AccessLayer>;
Expand All @@ -147,7 +148,7 @@ impl FsAccessLayer {
}

#[inline]
fn sst_file_path(&self, file_name: &str) -> String {
fn sst_file_path(&self, file_name: String) -> String {
format!("{}{}", self.sst_dir, file_name)
}
}
Expand All @@ -156,18 +157,16 @@ impl FsAccessLayer {
impl AccessLayer for FsAccessLayer {
async fn write_sst(
&self,
file_name: &str,
file_name: String,
evenyag marked this conversation as resolved.
Show resolved Hide resolved
iter: BatchIteratorPtr,
opts: WriteOptions,
) -> Result<()> {
) -> Result<String> {
evenyag marked this conversation as resolved.
Show resolved Hide resolved
// Now we only supports parquet format. We may allow caller to specific sst format in
// WriteOptions in the future.
let writer = ParquetWriter::new(
&self.sst_file_path(file_name),
iter,
self.object_store.clone(),
);
let file_path = self.sst_file_path(file_name);
let writer = ParquetWriter::new(file_path.clone(), iter, self.object_store.clone());
v0y4g3r marked this conversation as resolved.
Show resolved Hide resolved

writer.write_sst(opts).await
writer.write_sst(opts).await?;
Ok(file_path)
}
}
8 changes: 4 additions & 4 deletions src/storage/src/sst/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ pub struct ParquetWriter {

impl ParquetWriter {
pub fn new(
file_name: &str,
file_name: String,
v0y4g3r marked this conversation as resolved.
Show resolved Hide resolved
iter: BatchIteratorPtr,
object_store: ObjectStore,
) -> ParquetWriter {
ParquetWriter {
file_name: file_name.to_string(),
file_name,
iter,
object_store,
}
Expand Down Expand Up @@ -214,9 +214,9 @@ mod tests {
let path = dir.path().to_str().unwrap();
let backend = Backend::build().root(path).finish().await.unwrap();
let object_store = ObjectStore::new(backend);
let sst_file_name = "test-flush.parquet";
let sst_file_name = "test-flush.parquet".to_string();
let iter = memtable.iter(IterContext::default()).unwrap();
let writer = ParquetWriter::new(sst_file_name, iter, object_store);
let writer = ParquetWriter::new(sst_file_name.clone(), iter, object_store);

writer
.write_sst(sst::WriteOptions::default())
Expand Down