Skip to content

Commit

Permalink
Issue12/add_barcoding_kits_and_sample_rate (#13)
Browse files Browse the repository at this point in the history
* Add the end point for the available barcoding kits in protocol proto

* Add sample rate endpoint for `readfish >= 2024.X.X`

* Change debug logs to `debug!` from `info!`

* Update R9 example profiles

* Update R10 barcoded toml

* Add noise to R10 squiggle to vastly improve performance

* Don't add digitisation. I have no idea what effect this has on downstream basecalling, but it makes readfish base-calling a lot better

* Set the conencted guppy version to 7.3.9. WILL LIMIT readfish compatibility to 2024.2.0

* Revert "Add noise to R10 squiggle to vastly improve performance"

This reverts commit 9423c1c.

* barcode is now R10, 5kHZ with a newly derived model works well

* Include digitisation, range, scale and offset on sim.
Send them over the device RPC, so readfish uses the correct values when base calling.

* Update prefix squiggle

* newly derived model for R10 5Khz, using MEAN 100.5 and Std dev 23.5

* Mutate the signal vec in place for ADC values

* Fix clippy warnings

* Add rayon

* Add Laplace noise in first iteration of R10 signal
parallelise conversion to ADC

* Implement the experiment_duration flag to actually work, needs docs updating

* 5Khz human barcoded toml

* Set PoreType to NotSet for POD5 output
Upgrades Podders to 0.1.4, which has the NotSet PoreType Enum variant.
This matches MinKNOW which also doesn't set this field.

* Add experiment duration to the tables f toml settings

---------

Co-authored-by: satriobio <satrio.biology@yahoo.com>
  • Loading branch information
Adoni5 and satriobio committed Jul 10, 2024
1 parent 703d6d0 commit 3c62096
Show file tree
Hide file tree
Showing 44 changed files with 262,357 additions and 262,218 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,11 @@ fern = { version = "0.6.2", features = ["colored", "chrono"] }
humantime = "2.1.0"
lazy_static = "1.4.0"
indicatif = "0.17.5"
podders = "0.1.3"
podders = "0.1.4"
# podders = { git = "https://github.com/Adoni5/podders.git" }
#podders = { path = "../podders" }
probability = "0.20.3"
rayon = "1.10.0"


[build-dependencies]
Expand Down
11 changes: 0 additions & 11 deletions Icarust.code-workspace

This file was deleted.

25 changes: 25 additions & 0 deletions Profile_tomls/config_dnar10_5khz_human_barcoded.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
output_path = "/data/projects/rory_says_hi/readfish_update_paper"
target_yield = 100000000000
working_pore_percent = 80
pore_type = "R10"
nucleotide_type = "DNA"

[parameters]
sample_name = "human_barcoded"
experiment_name = "test_timings_grid_builtin"
flowcell_name = "PAS000000"
experiment_duration_set = 120
device_id = "Bantersaurus"
position = "FenceSitter"
sample_rate = 5000
break_read_ms = 800

[[sample]]
name = "Bacteria 1"
input_genome = "/data/projects/rory_says_hi/hg38_no_alts.part_NC_000001.11 Homo sapiens chromosome 1, GRCh38.p14 Primary Assembly.fa.gz"
#input_genome = "/data/refs/hg38.p14.simple.fa"
#input_genome = "/data/projects/rory_says_hi/Icarust/docker/squiggle_arrs/mixed_ref.fa"
mean_read_length = 10000
weight = 1
barcodes = ["Barcode01", "Barcode02", "Barcode03"]
barcode_weights = [1, 2, 1]
7 changes: 4 additions & 3 deletions Profile_tomls/config_dnar10_barcoded.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
output_path = "/tmp/"
global_mean_read_length = 15000
target_yield = 100000000000
working_pore_percent = 85
working_pore_percent = 100
pore_type = "R10"
nucleotide_type = "DNA"
random_seed = 9

[parameters]
sample_name = "test"
experiment_name = "test_2_bacteria"
sample_name = "thesis_barcode_weights"
experiment_name = "thesis_barcode_weights"
flowcell_name = "FAQ1234"
experiment_duration_set = 4800
device_id = "Bantersaurus"
Expand Down
4 changes: 2 additions & 2 deletions Profile_tomls/config_dnar9.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ sample_rate = 4000

[[sample]]
name = "Bacteria 1"
input_genome = "squiggle_arrs/NC_002516.2.squiggle.npy"
input_genome = "squiggle_arrs/mixed_ref_r9/NC_002516.2.squiggle.npy"
mean_read_length = 20000
weight = 1

[[sample]]
name = "Bacteria 2"
input_genome = "squiggle_arrs/NC_003997.3.squiggle.npy"
input_genome = "squiggle_arrs/mixed_ref_r9/NC_003997.3.squiggle.npy"
mean_read_length = 15000
weight = 2
4 changes: 2 additions & 2 deletions Profile_tomls/config_dnar9_barcoded.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ sample_rate = 4000

[[sample]]
name = "Bacteria 1"
input_genome = "docker/squiggle_arrs/mixed_ref.fa"
mean_read_length = 10000
input_genome = "squiggle_arrs/mixed_ref_r9"
mean_read_length = 5000
weight = 1
barcodes = ["Barcode01", "Barcode02", "Barcode03"]
barcode_weights = [1, 2, 3]
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ The parameters are applied to the "sequencer". They are used to setup the GRPC s
| position | string | True | Position name. This has to match what readfish is looking for. |
| break_read_ms | int | False | How many milliseconds to chunk reads into. Default 400. |
| sample_rate | int | False | Sample rate in Hz. Default [4000]. Suggest 3000 for RNA, 4000 or 5000 for DNA otherwise Dorado will throw a Hissy fit. |
| experiment_duration_set | int | False | Time in minutes to run the experiment for. |


### Sample
Expand Down
2 changes: 1 addition & 1 deletion config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ manager = 10000
position = 10001

[SEQUENCER]
channels = 3000
channels = 512
8 changes: 8 additions & 0 deletions proto/minknow_api/device.proto
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ service DeviceService {
rpc get_calibration (GetCalibrationRequest) returns (GetCalibrationResponse) {
option idempotency_level = NO_SIDE_EFFECTS;
}

// Get the sample rate of the device
//
// Please refer to MinionDeviceService and PromethionDeviceService for the expected
// return value for a minion and promethion respectively
rpc get_sample_rate (GetSampleRateRequest) returns (GetSampleRateResponse) {
option idempotency_level = NO_SIDE_EFFECTS;
}
}


Expand Down
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions squiggle_arrs/mixed_ref_r9/distributions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"weights": [6264404, 5227293], "names": ["NC_002516.2", "NC_003997.3"]}
47 changes: 33 additions & 14 deletions src/impl_services/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,8 @@ fn start_write_out_thread(
Some(RunInfoData {
acquisition_id: run_id.clone(),
acquisition_start_time: 1625097600000,
adc_max: 32767,
adc_min: -32768,
adc_max: 2047,
adc_min: 0,
context_tags: context_tags
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
Expand Down Expand Up @@ -525,8 +525,8 @@ fn start_write_out_thread(
]);
let channel_info = ChannelInfo::new(
2048_f64,
0.0,
200.0,
-243.0,
2048.0 * 0.1462070643901825,
config.parameters.get_sample_rate() as f64,
to_write_info.channel_number.clone(),
);
Expand All @@ -548,18 +548,21 @@ fn start_write_out_thread(
} else {
EndReason::SIGNAL_POSITIVE
};
let pt = match ic_pt {
PoreType::R9 => PodPoreType::R9,
PoreType::R10 => PodPoreType::R10,
};
// Apparently, MinKNOW doesn't set this - or specifically it is set to
// not-set, so we will match that.
// let pt = match ic_pt {
// PoreType::R9 => PodPoreType::R9,
// PoreType::R10 => PodPoreType::R10,
// };
let pt = PodPoreType::NotSet;
let read: PodReadInfo = PodReadInfo {
read_id: Uuid::parse_str(to_write_info.read_id.as_str()).unwrap(),
pore_type: pt,
signal_: signal,
channel: to_write_info.channel as u16,
well: 1,
calibration_offset: -264.0,
calibration_scale: 0.187_069_85,
calibration_offset: -243.0,
calibration_scale: 0.14620706,
read_number: to_write_info.read_number,
start: 1,
median_before: 100.0,
Expand Down Expand Up @@ -1294,7 +1297,7 @@ fn generate_read(
value.start_time_utc = Utc::now();
value.read_number = *read_number;
let sample_choice: &String = &samples[dist.sample(rng)];
value.read_sample_name = sample_choice.clone();
value.read_sample_name.clone_from(sample_choice);
let sample_info: &SampleInfo = &views[sample_choice];
// choose a barcode if we need to - else we always use the first distirbution in the vec
let mut file_weight_choice: usize = 0;
Expand Down Expand Up @@ -1437,13 +1440,27 @@ impl DataServiceServicer {
// start the thread to generate data
thread::spawn(move || {
let r: ReacquisitionPoisson = ReacquisitionPoisson::new(1.0, 0.0, 0.0001, 0.05);
let _experiment_duration = config.parameters.experiment_duration_set;

// read number for adding to unblock
let mut read_number: u32 = 0;
let mut completed_reads: u32 = 0;

// Infinte loop for data generation
loop {
if let Some(_experiment_duration) = _experiment_duration {
// The experiment length in minutes
let minutes_since_start = (Utc::now().timestamp() as u64 - start_time) / 60;
if std::convert::TryInto::<usize>::try_into(minutes_since_start).unwrap()
> _experiment_duration
{
info!("Reached experiment duration, stopping...");
{
*graceful_shutdown.lock().unwrap() = true;
}
break;
}
}
let read_process = Instant::now();
// debug!("Sequencer mock loop start");
let mut new_reads = 0;
Expand Down Expand Up @@ -1520,7 +1537,7 @@ impl DataServiceServicer {
continue;
}
// chance to aquire a read
if rng.gen_bool(0.8) {
if rng.gen_bool(0.75) {
new_reads += 1;
read_number += 1;
generate_read(
Expand Down Expand Up @@ -1555,6 +1572,8 @@ impl DataServiceServicer {
break;
}
}
std::thread::sleep(Duration::from_secs(10));
std::process::exit(0);
});
// return our newly initialised DataServiceServicer to add onto the GRPC server
DataServiceServicer {
Expand Down Expand Up @@ -1647,7 +1666,7 @@ impl DataService for DataServiceServicer {
// Convert sample_rate_hz into microseconds
let chunk_to_serve_length: usize = ((sample_rate_hz as f64 / 1_000_000_f64) * elapsed_time.num_microseconds().unwrap() as f64) as usize;
let stop = start + chunk_to_serve_length as usize;
info!("elasped: {elapsed_time}, start {start}, length {chunk_to_serve_length}, stop: {stop}");
debug!("elasped: {elapsed_time}, start {start}, length {chunk_to_serve_length}, stop: {stop}");
// let mut stop = convert_milliseconds_to_samples(elapsed_time.num_milliseconds(), sampling);
// slice of signal is too short
if start > stop || (stop - start) < chunk_to_serve_length as usize {
Expand All @@ -1661,7 +1680,7 @@ impl DataService for DataServiceServicer {
}
// CHeck start is not past end
if start > read_info.read.len() {
info!("Skipping as start {start} is greater than read signal lenth {}", read_info.read.len());
warn!("Skipping as start {start} is greater than read signal lenth {}", read_info.read.len());
continue
}

Expand Down
44 changes: 34 additions & 10 deletions src/impl_services/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
//!
//! 2. get_flow_cell_info
//!
//! Returns a tonne of information about the flowclel
//! Returns a tonne of information about the flowcell
//!
//! 3. Returns the sample rate, which is used in dorado >= 7.3.9
//!
use crate::services::minknow_api::device;
use crate::services::minknow_api::device::device_service_server::DeviceService;
Expand All @@ -16,11 +18,27 @@ use tonic::{Request, Response, Status};
#[derive(Debug)]
pub struct Device {
channel_size: usize,
sample_rate: u32,
offset: f32,
range: f32,
digitisation: u32,
}

impl Device {
pub fn new(channel_size: usize) -> Device {
Device { channel_size }
pub fn new(
channel_size: usize,
sample_rate: u32,
offset: f32,
range: f32,
digitisation: u32,
) -> Device {
Device {
channel_size,
sample_rate,
offset,
range,
digitisation,
}
}
}
#[tonic::async_trait]
Expand All @@ -34,14 +52,10 @@ impl DeviceService for Device {
let channels: u32 = request_values.last_channel - request_values.first_channel + 1;
// explicitly convert to usize from u32
let n_us = usize::try_from(channels).unwrap();
let mut offsets = vec![0.0];
// resize in place
offsets.resize(n_us, 0.0);
let mut pa_ranges = vec![1.0];
// resize in place
pa_ranges.resize(n_us, 1.0);
let offsets = vec![self.offset; n_us];
let pa_ranges = vec![self.range; n_us];
return Ok(Response::new(device::GetCalibrationResponse {
digitisation: 1,
digitisation: self.digitisation,
offsets,
pa_ranges,
has_calibration: true,
Expand Down Expand Up @@ -69,4 +83,14 @@ impl DeviceService for Device {
insertion_script_status: 0,
}))
}

/// Get the sample rate for a givenexperiment
async fn get_sample_rate(
&self,
_request: Request<device::GetSampleRateRequest>,
) -> Result<Response<device::GetSampleRateResponse>, Status> {
Ok(Response::new(device::GetSampleRateResponse {
sample_rate: self.sample_rate,
}))
}
}
2 changes: 1 addition & 1 deletion src/impl_services/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl ManagerService for Manager {
guppy_connected_version: String::from("6hopefully"),
}))
}

#[allow(clippy::diverging_sub_expression)]
async fn describe_host(
&self,
_request: Request<DescribeHostRequest>,
Expand Down
Loading

0 comments on commit 3c62096

Please sign in to comment.