Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue12/add_barcoding_kits_and_sample_rate #13

Merged
merged 21 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
a7441ca
Add the end point for the available barcoding kits in protocol proto
Adoni5 Apr 29, 2024
6aef351
Add sample rate endpoint for `readfish >= 2024.X.X`
Adoni5 Apr 29, 2024
42d0280
Change debug logs to `debug!` from `info!`
Adoni5 Apr 29, 2024
962af7e
Update R9 example profiles
Adoni5 May 13, 2024
f30b997
Update R10 barcoded toml
Adoni5 May 13, 2024
9423c1c
Add noise to R10 squiggle to vastly improve performance
Adoni5 May 13, 2024
0998001
Don't add digitisation. I have no idea what effect this has on downst…
Adoni5 May 22, 2024
db9d503
Set the conencted guppy version to 7.3.9. WILL LIMIT readfish compati…
Adoni5 May 22, 2024
e772827
Revert "Add noise to R10 squiggle to vastly improve performance"
satriobio Jun 13, 2024
9ac641e
barcode is now R10, 5kHZ with a newly derived model works well
satriobio Jun 17, 2024
8f46f13
Include digitisation, range, scale and offset on sim.
satriobio Jun 17, 2024
a5d10c9
Update prefix squiggle
satriobio Jun 17, 2024
1f39738
newly derived model for R10 5Khz, using MEAN 100.5 and Std dev 23.5
satriobio Jun 17, 2024
5d87550
Mutate the signal vec in place for ADC values
satriobio Jun 17, 2024
17c499e
Fix clippy warnings
satriobio Jun 17, 2024
7311d86
Add rayon
satriobio Jun 17, 2024
ffd2461
Add Laplace noise in first iteration of R10 signal
satriobio Jun 17, 2024
9eb22bf
Implement the experiment_duration flag to actually work, needs docs u…
Adoni5 Jun 20, 2024
cf27f12
5Khz human barcoded toml
Adoni5 Jun 21, 2024
2567e9b
Set PoreType to NotSet for POD5 output
Adoni5 Jul 10, 2024
4cbd05d
Add experiment duration to the tables f toml settings
Adoni5 Jul 10, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,11 @@ fern = { version = "0.6.2", features = ["colored", "chrono"] }
humantime = "2.1.0"
lazy_static = "1.4.0"
indicatif = "0.17.5"
podders = "0.1.3"
podders = "0.1.4"
# podders = { git = "https://github.com/Adoni5/podders.git" }
#podders = { path = "../podders" }
probability = "0.20.3"
rayon = "1.10.0"


[build-dependencies]
Expand Down
11 changes: 0 additions & 11 deletions Icarust.code-workspace

This file was deleted.

25 changes: 25 additions & 0 deletions Profile_tomls/config_dnar10_5khz_human_barcoded.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
output_path = "/data/projects/rory_says_hi/readfish_update_paper"
target_yield = 100000000000
working_pore_percent = 80
pore_type = "R10"
nucleotide_type = "DNA"

[parameters]
sample_name = "human_barcoded"
experiment_name = "test_timings_grid_builtin"
flowcell_name = "PAS000000"
experiment_duration_set = 120
device_id = "Bantersaurus"
position = "FenceSitter"
sample_rate = 5000
break_read_ms = 800

[[sample]]
name = "Bacteria 1"
input_genome = "/data/projects/rory_says_hi/hg38_no_alts.part_NC_000001.11 Homo sapiens chromosome 1, GRCh38.p14 Primary Assembly.fa.gz"
#input_genome = "/data/refs/hg38.p14.simple.fa"
#input_genome = "/data/projects/rory_says_hi/Icarust/docker/squiggle_arrs/mixed_ref.fa"
mean_read_length = 10000
weight = 1
barcodes = ["Barcode01", "Barcode02", "Barcode03"]
barcode_weights = [1, 2, 1]
7 changes: 4 additions & 3 deletions Profile_tomls/config_dnar10_barcoded.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
output_path = "/tmp/"
global_mean_read_length = 15000
target_yield = 100000000000
working_pore_percent = 85
working_pore_percent = 100
pore_type = "R10"
nucleotide_type = "DNA"
random_seed = 9

[parameters]
sample_name = "test"
experiment_name = "test_2_bacteria"
sample_name = "thesis_barcode_weights"
experiment_name = "thesis_barcode_weights"
flowcell_name = "FAQ1234"
experiment_duration_set = 4800
device_id = "Bantersaurus"
Expand Down
4 changes: 2 additions & 2 deletions Profile_tomls/config_dnar9.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ sample_rate = 4000

[[sample]]
name = "Bacteria 1"
input_genome = "squiggle_arrs/NC_002516.2.squiggle.npy"
input_genome = "squiggle_arrs/mixed_ref_r9/NC_002516.2.squiggle.npy"
mean_read_length = 20000
weight = 1

[[sample]]
name = "Bacteria 2"
input_genome = "squiggle_arrs/NC_003997.3.squiggle.npy"
input_genome = "squiggle_arrs/mixed_ref_r9/NC_003997.3.squiggle.npy"
mean_read_length = 15000
weight = 2
4 changes: 2 additions & 2 deletions Profile_tomls/config_dnar9_barcoded.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ sample_rate = 4000

[[sample]]
name = "Bacteria 1"
input_genome = "docker/squiggle_arrs/mixed_ref.fa"
mean_read_length = 10000
input_genome = "squiggle_arrs/mixed_ref_r9"
mean_read_length = 5000
weight = 1
barcodes = ["Barcode01", "Barcode02", "Barcode03"]
barcode_weights = [1, 2, 3]
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ The parameters are applied to the "sequencer". They are used to setup the GRPC s
| position | string | True | Position name. This has to match what readfish is looking for. |
| break_read_ms | int | False | How many milliseconds to chunk reads into. Default 400. |
| sample_rate | int | False | Sample rate in Hz. Default [4000]. Suggest 3000 for RNA, 4000 or 5000 for DNA otherwise Dorado will throw a Hissy fit. |
| experiment_duration_set | int | False | Time in minutes to run the experiment for. |


### Sample
Expand Down
2 changes: 1 addition & 1 deletion config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ manager = 10000
position = 10001

[SEQUENCER]
channels = 3000
channels = 512
8 changes: 8 additions & 0 deletions proto/minknow_api/device.proto
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ service DeviceService {
rpc get_calibration (GetCalibrationRequest) returns (GetCalibrationResponse) {
option idempotency_level = NO_SIDE_EFFECTS;
}

// Get the sample rate of the device
//
// Please refer to MinionDeviceService and PromethionDeviceService for the expected
// return value for a minion and promethion respectively
rpc get_sample_rate (GetSampleRateRequest) returns (GetSampleRateResponse) {
option idempotency_level = NO_SIDE_EFFECTS;
}
}


Expand Down
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions squiggle_arrs/mixed_ref_r9/distributions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"weights": [6264404, 5227293], "names": ["NC_002516.2", "NC_003997.3"]}
47 changes: 33 additions & 14 deletions src/impl_services/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,8 @@ fn start_write_out_thread(
Some(RunInfoData {
acquisition_id: run_id.clone(),
acquisition_start_time: 1625097600000,
adc_max: 32767,
adc_min: -32768,
adc_max: 2047,
adc_min: 0,
context_tags: context_tags
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
Expand Down Expand Up @@ -525,8 +525,8 @@ fn start_write_out_thread(
]);
let channel_info = ChannelInfo::new(
2048_f64,
0.0,
200.0,
-243.0,
2048.0 * 0.1462070643901825,
config.parameters.get_sample_rate() as f64,
to_write_info.channel_number.clone(),
);
Expand All @@ -548,18 +548,21 @@ fn start_write_out_thread(
} else {
EndReason::SIGNAL_POSITIVE
};
let pt = match ic_pt {
PoreType::R9 => PodPoreType::R9,
PoreType::R10 => PodPoreType::R10,
};
// Apparently, MinKNOW doesn't set this - or specifically it is set to
// not-set, so we will match that.
// let pt = match ic_pt {
// PoreType::R9 => PodPoreType::R9,
// PoreType::R10 => PodPoreType::R10,
// };
let pt = PodPoreType::NotSet;
let read: PodReadInfo = PodReadInfo {
read_id: Uuid::parse_str(to_write_info.read_id.as_str()).unwrap(),
pore_type: pt,
signal_: signal,
channel: to_write_info.channel as u16,
well: 1,
calibration_offset: -264.0,
calibration_scale: 0.187_069_85,
calibration_offset: -243.0,
calibration_scale: 0.14620706,
read_number: to_write_info.read_number,
start: 1,
median_before: 100.0,
Expand Down Expand Up @@ -1294,7 +1297,7 @@ fn generate_read(
value.start_time_utc = Utc::now();
value.read_number = *read_number;
let sample_choice: &String = &samples[dist.sample(rng)];
value.read_sample_name = sample_choice.clone();
value.read_sample_name.clone_from(sample_choice);
let sample_info: &SampleInfo = &views[sample_choice];
// choose a barcode if we need to - else we always use the first distirbution in the vec
let mut file_weight_choice: usize = 0;
Expand Down Expand Up @@ -1437,13 +1440,27 @@ impl DataServiceServicer {
// start the thread to generate data
thread::spawn(move || {
let r: ReacquisitionPoisson = ReacquisitionPoisson::new(1.0, 0.0, 0.0001, 0.05);
let _experiment_duration = config.parameters.experiment_duration_set;

// read number for adding to unblock
let mut read_number: u32 = 0;
let mut completed_reads: u32 = 0;

// Infinte loop for data generation
loop {
if let Some(_experiment_duration) = _experiment_duration {
// The experiment length in minutes
let minutes_since_start = (Utc::now().timestamp() as u64 - start_time) / 60;
if std::convert::TryInto::<usize>::try_into(minutes_since_start).unwrap()
> _experiment_duration
{
info!("Reached experiment duration, stopping...");
{
*graceful_shutdown.lock().unwrap() = true;
}
break;
}
}
let read_process = Instant::now();
// debug!("Sequencer mock loop start");
let mut new_reads = 0;
Expand Down Expand Up @@ -1520,7 +1537,7 @@ impl DataServiceServicer {
continue;
}
// chance to aquire a read
if rng.gen_bool(0.8) {
if rng.gen_bool(0.75) {
new_reads += 1;
read_number += 1;
generate_read(
Expand Down Expand Up @@ -1555,6 +1572,8 @@ impl DataServiceServicer {
break;
}
}
std::thread::sleep(Duration::from_secs(10));
std::process::exit(0);
});
// return our newly initialised DataServiceServicer to add onto the GRPC server
DataServiceServicer {
Expand Down Expand Up @@ -1647,7 +1666,7 @@ impl DataService for DataServiceServicer {
// Convert sample_rate_hz into microseconds
let chunk_to_serve_length: usize = ((sample_rate_hz as f64 / 1_000_000_f64) * elapsed_time.num_microseconds().unwrap() as f64) as usize;
let stop = start + chunk_to_serve_length as usize;
info!("elasped: {elapsed_time}, start {start}, length {chunk_to_serve_length}, stop: {stop}");
debug!("elasped: {elapsed_time}, start {start}, length {chunk_to_serve_length}, stop: {stop}");
// let mut stop = convert_milliseconds_to_samples(elapsed_time.num_milliseconds(), sampling);
// slice of signal is too short
if start > stop || (stop - start) < chunk_to_serve_length as usize {
Expand All @@ -1661,7 +1680,7 @@ impl DataService for DataServiceServicer {
}
// CHeck start is not past end
if start > read_info.read.len() {
info!("Skipping as start {start} is greater than read signal lenth {}", read_info.read.len());
warn!("Skipping as start {start} is greater than read signal lenth {}", read_info.read.len());
continue
}

Expand Down
44 changes: 34 additions & 10 deletions src/impl_services/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
//!
//! 2. get_flow_cell_info
//!
//! Returns a tonne of information about the flowclel
//! Returns a tonne of information about the flowcell
//!
//! 3. Returns the sample rate, which is used in dorado >= 7.3.9
//!
use crate::services::minknow_api::device;
use crate::services::minknow_api::device::device_service_server::DeviceService;
Expand All @@ -16,11 +18,27 @@ use tonic::{Request, Response, Status};
#[derive(Debug)]
pub struct Device {
channel_size: usize,
sample_rate: u32,
offset: f32,
range: f32,
digitisation: u32,
}

impl Device {
pub fn new(channel_size: usize) -> Device {
Device { channel_size }
pub fn new(
channel_size: usize,
sample_rate: u32,
offset: f32,
range: f32,
digitisation: u32,
) -> Device {
Device {
channel_size,
sample_rate,
offset,
range,
digitisation,
}
}
}
#[tonic::async_trait]
Expand All @@ -34,14 +52,10 @@ impl DeviceService for Device {
let channels: u32 = request_values.last_channel - request_values.first_channel + 1;
// explicitly convert to usize from u32
let n_us = usize::try_from(channels).unwrap();
let mut offsets = vec![0.0];
// resize in place
offsets.resize(n_us, 0.0);
let mut pa_ranges = vec![1.0];
// resize in place
pa_ranges.resize(n_us, 1.0);
let offsets = vec![self.offset; n_us];
let pa_ranges = vec![self.range; n_us];
return Ok(Response::new(device::GetCalibrationResponse {
digitisation: 1,
digitisation: self.digitisation,
offsets,
pa_ranges,
has_calibration: true,
Expand Down Expand Up @@ -69,4 +83,14 @@ impl DeviceService for Device {
insertion_script_status: 0,
}))
}

/// Get the sample rate for a givenexperiment
async fn get_sample_rate(
&self,
_request: Request<device::GetSampleRateRequest>,
) -> Result<Response<device::GetSampleRateResponse>, Status> {
Ok(Response::new(device::GetSampleRateResponse {
sample_rate: self.sample_rate,
}))
}
}
2 changes: 1 addition & 1 deletion src/impl_services/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl ManagerService for Manager {
guppy_connected_version: String::from("6hopefully"),
}))
}

#[allow(clippy::diverging_sub_expression)]
async fn describe_host(
&self,
_request: Request<DescribeHostRequest>,
Expand Down
Loading