Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions docs/paper/reductions.typ
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@
"MinimumTardinessSequencing": [Minimum Tardiness Sequencing],
"MultipleChoiceBranching": [Multiple Choice Branching],
"MultipleCopyFileAllocation": [Multiple Copy File Allocation],
"ExpectedRetrievalCost": [Expected Retrieval Cost],
"MultiprocessorScheduling": [Multiprocessor Scheduling],
"PartitionIntoPathsOfLength2": [Partition into Paths of Length 2],
"PartitionIntoTriangles": [Partition Into Triangles],
Expand Down Expand Up @@ -2460,6 +2461,45 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76],
]
}

#{
let x = load-model-example("ExpectedRetrievalCost")
let K = x.instance.bound
[
#problem-def("ExpectedRetrievalCost")[
Given a set $R = {r_1, dots, r_n}$ of records, access probabilities $p(r) in [0, 1]$ with $sum_(r in R) p(r) = 1$, a positive integer $m$ of circular storage sectors, and a bound $K$, determine whether there exists a partition $R_1, dots, R_m$ of $R$ such that
$sum_(i=1)^m sum_(j=1)^m p(R_i) p(R_j) d(i, j) <= K,$
where $p(R_i) = sum_(r in R_i) p(r)$ and
$d(i, j) = j - i - 1$ for $1 <= i < j <= m$, while $d(i, j) = m - i + j - 1$ for $1 <= j <= i <= m$.
][
Expected Retrieval Cost is storage-and-retrieval problem SR4 in Garey and Johnson @garey1979. The model abstracts a drum-like storage device with fixed read heads: placing probability mass evenly around the cycle reduces the expected waiting time until the next requested sector rotates under the head. Cody and Coffman introduced the formulation and analyzed exact and heuristic record-allocation algorithms for fixed numbers of sectors @codycoffman1976. Garey and Johnson record that the general decision problem is NP-complete in the strong sense via transformations from Partition and 3-Partition @garey1979. The implementation in this repository uses one $m$-ary variable per record, so the registered exact baseline enumerates $m^n$ assignments. For practicality, the code stores the probabilities and bound as floating-point values even though the book states $K$ as an integer.

*Example.* Take six records with probabilities $(0.2, 0.15, 0.15, 0.2, 0.1, 0.2)$, three sectors, and $K = #K$. Assign
$R_1 = {r_1, r_5}$, $R_2 = {r_2, r_4}$, and $R_3 = {r_3, r_6}$.
Then the sector masses are $(p(R_1), p(R_2), p(R_3)) = (0.3, 0.35, 0.35)$.
For $m = 3$, the non-zero latencies are $d(1, 1) = d(2, 2) = d(3, 3) = 2$, $d(1, 3) = d(2, 1) = d(3, 2) = 1$, and the remaining pairs contribute 0. Hence the expected retrieval cost is $1.0025 <= #K$, so the allocation is satisfying.

#pred-commands(
"pred create --example ExpectedRetrievalCost -o expected-retrieval-cost.json",
"pred solve expected-retrieval-cost.json --solver brute-force",
"pred evaluate expected-retrieval-cost.json --config " + x.optimal_config.map(str).join(","),
)

#figure(
table(
columns: 3,
inset: 6pt,
stroke: 0.5pt + luma(180),
[Sector], [Records], [Mass],
[$S_1$], [$r_1, r_5$], [$0.3$],
[$S_2$], [$r_2, r_4$], [$0.35$],
[$S_3$], [$r_3, r_6$], [$0.35$],
),
caption: [Expected Retrieval Cost example with cyclic sector order $S_1 -> S_2 -> S_3 -> S_1$. The satisfying allocation yields masses $(0.3, 0.35, 0.35)$ and total cost $1.0025$.],
) <fig:expected-retrieval-cost>
]
]
}

== Set Problems

#{
Expand Down
11 changes: 11 additions & 0 deletions docs/paper/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -1116,6 +1116,17 @@ @article{coffman1972
doi = {10.1007/BF00288685}
}

@article{codycoffman1976,
author = {R. A. Cody and E. G. Coffman, Jr.},
title = {Record Allocation for Minimizing Expected Retrieval Costs on Drum-Like Storage Devices},
journal = {Journal of the ACM},
volume = {23},
number = {1},
pages = {103--115},
year = {1976},
doi = {10.1145/321921.321933}
}

@inproceedings{cordella2004,
author = {Luigi P. Cordella and Pasquale Foggia and Carlo Sansone and Mario Vento},
title = {A (Sub)Graph Isomorphism Algorithm for Matching Large Graphs},
Expand Down
10 changes: 10 additions & 0 deletions problemreductions-cli/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ Flags by problem type:
CapacityAssignment --capacities, --cost-matrix, --delay-matrix, --cost-budget, --delay-budget
SubsetSum --sizes, --target
SumOfSquaresPartition --sizes, --num-groups, --bound
ExpectedRetrievalCost --probabilities, --num-sectors, --latency-bound
PaintShop --sequence
MaximumSetPacking --sets [--weights]
MinimumHittingSet --universe, --sets
Expand Down Expand Up @@ -474,6 +475,9 @@ pub struct CreateArgs {
/// Item sizes for BinPacking (comma-separated, e.g., "3,3,2,2")
#[arg(long)]
pub sizes: Option<String>,
/// Record access probabilities for ExpectedRetrievalCost (comma-separated, e.g., "0.2,0.15,0.15,0.2,0.1,0.2")
#[arg(long)]
pub probabilities: Option<String>,
/// Bin capacity for BinPacking
#[arg(long)]
pub capacity: Option<String>,
Expand Down Expand Up @@ -546,6 +550,9 @@ pub struct CreateArgs {
/// Bound parameter (lower bound for LongestCircuit; upper or length bound for BoundedComponentSpanningForest, LengthBoundedDisjointPaths, LongestCommonSubsequence, MultipleCopyFileAllocation, MultipleChoiceBranching, OptimalLinearArrangement, RootedTreeArrangement, RuralPostman, ShortestCommonSupersequence, or StringToStringCorrection)
#[arg(long, allow_hyphen_values = true)]
pub bound: Option<i64>,
/// Upper bound on expected retrieval latency for ExpectedRetrievalCost
#[arg(long)]
pub latency_bound: Option<f64>,
/// Upper bound on total path length
#[arg(long)]
pub length_bound: Option<i32>,
Expand Down Expand Up @@ -703,6 +710,9 @@ pub struct CreateArgs {
/// Number of groups for SumOfSquaresPartition
#[arg(long)]
pub num_groups: Option<usize>,
/// Number of sectors for ExpectedRetrievalCost
#[arg(long)]
pub num_sectors: Option<usize>,
/// Source string for StringToStringCorrection (comma-separated symbol indices, e.g., "0,1,2,3")
#[arg(long)]
pub source_string: Option<String>,
Expand Down
134 changes: 127 additions & 7 deletions problemreductions-cli/src/commands/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@ use problemreductions::models::graph::{
use problemreductions::models::misc::{
AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CapacityAssignment, CbqRelation,
ConjunctiveBooleanQuery, ConsistencyOfDatabaseFrequencyTables, EnsembleComputation,
FlowShopScheduling, FrequencyTable, KnownValue, LongestCommonSubsequence,
MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack,
QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling,
SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost,
SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness,
SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence,
StringToStringCorrection, SubsetSum, SumOfSquaresPartition, TimetableDesign,
ExpectedRetrievalCost, FlowShopScheduling, FrequencyTable, KnownValue,
LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop,
PartiallyOrderedKnapsack, QueryArg, RectilinearPictureCompression,
ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines,
SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime,
SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines,
SequencingWithinIntervals, ShortestCommonSupersequence, StringToStringCorrection, SubsetSum,
SumOfSquaresPartition, TimetableDesign,
};
use problemreductions::models::BiconnectivityAugmentation;
use problemreductions::prelude::*;
Expand All @@ -43,6 +44,10 @@ const MULTIPLE_COPY_FILE_ALLOCATION_EXAMPLE_ARGS: &str =
"--graph 0-1,1-2,2-3 --usage 5,4,3,2 --storage 1,1,1,1 --bound 8";
const MULTIPLE_COPY_FILE_ALLOCATION_USAGE: &str =
"Usage: pred create MultipleCopyFileAllocation --graph 0-1,1-2,2-3 --usage 5,4,3,2 --storage 1,1,1,1 --bound 8";
const EXPECTED_RETRIEVAL_COST_EXAMPLE_ARGS: &str =
"--probabilities 0.2,0.15,0.15,0.2,0.1,0.2 --num-sectors 3 --latency-bound 1.01";
const EXPECTED_RETRIEVAL_COST_USAGE: &str =
"Usage: pred create ExpectedRetrievalCost --probabilities 0.2,0.15,0.15,0.2,0.1,0.2 --num-sectors 3 --latency-bound 1.01";

/// Check if all data flags are None (no problem-specific input provided).
fn all_data_flags_empty(args: &CreateArgs) -> bool {
Expand Down Expand Up @@ -85,6 +90,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool {
&& args.requirement_2.is_none()
&& args.requirement.is_none()
&& args.sizes.is_none()
&& args.probabilities.is_none()
&& args.capacity.is_none()
&& args.sequence.is_none()
&& args.sets.is_none()
Expand All @@ -110,6 +116,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool {
&& args.tree.is_none()
&& args.required_edges.is_none()
&& args.bound.is_none()
&& args.latency_bound.is_none()
&& args.length_bound.is_none()
&& args.weight_bound.is_none()
&& args.cost_bound.is_none()
Expand Down Expand Up @@ -152,6 +159,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool {
&& args.task_avail.is_none()
&& args.alphabet_size.is_none()
&& args.num_groups.is_none()
&& args.num_sectors.is_none()
&& args.dependencies.is_none()
&& args.num_attributes.is_none()
&& args.source_string.is_none()
Expand Down Expand Up @@ -606,6 +614,7 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str {
}
"MultiprocessorScheduling" => "--lengths 4,5,3,2,6 --num-processors 2 --deadline 10",
"MinimumMultiwayCut" => "--graph 0-1,1-2,2-3 --terminals 0,2 --edge-weights 1,1,1",
"ExpectedRetrievalCost" => EXPECTED_RETRIEVAL_COST_EXAMPLE_ARGS,
"SequencingWithinIntervals" => "--release-times 0,0,5 --deadlines 11,11,6 --lengths 3,1,1",
"StaffScheduling" => {
"--schedules \"1,1,1,1,1,0,0;0,1,1,1,1,1,0;0,0,1,1,1,1,1;1,0,0,1,1,1,1;1,1,0,0,1,1,1\" --requirements 2,2,2,3,3,2,1 --num-workers 4 --k 5"
Expand Down Expand Up @@ -1510,6 +1519,59 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> {
)
}

// ExpectedRetrievalCost (probabilities + sectors + latency bound)
"ExpectedRetrievalCost" => {
let probabilities_str = args.probabilities.as_deref().ok_or_else(|| {
anyhow::anyhow!(
"ExpectedRetrievalCost requires --probabilities\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
)
})?;
let probabilities: Vec<f64> = util::parse_comma_list(probabilities_str)
.map_err(|e| anyhow::anyhow!("{e}\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"))?;
anyhow::ensure!(
!probabilities.is_empty(),
"ExpectedRetrievalCost requires at least one probability\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
);
anyhow::ensure!(
probabilities.iter().all(|p| p.is_finite() && (0.0..=1.0).contains(p)),
"ExpectedRetrievalCost probabilities must be finite values in [0, 1]\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
);
let total_probability: f64 = probabilities.iter().sum();
anyhow::ensure!(
(total_probability - 1.0).abs() <= 1e-9,
"ExpectedRetrievalCost probabilities must sum to 1.0\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
);

let num_sectors = args.num_sectors.ok_or_else(|| {
anyhow::anyhow!(
"ExpectedRetrievalCost requires --num-sectors\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
)
})?;
anyhow::ensure!(
num_sectors >= 2,
"ExpectedRetrievalCost requires at least two sectors\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
);

let latency_bound = args.latency_bound.ok_or_else(|| {
anyhow::anyhow!(
"ExpectedRetrievalCost requires --latency-bound\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
)
})?;
anyhow::ensure!(
latency_bound.is_finite() && latency_bound >= 0.0,
"ExpectedRetrievalCost requires a finite non-negative --latency-bound\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
);

(
ser(ExpectedRetrievalCost::new(
probabilities,
num_sectors,
latency_bound,
))?,
resolved_variant.clone(),
)
}

// UndirectedFlowLowerBounds (graph + capacities + lower bounds + terminals + requirement)
"UndirectedFlowLowerBounds" => {
let usage = "Usage: pred create UndirectedFlowLowerBounds --graph 0-1,0-2,1-3,2-3,1-4,3-5,4-5 --capacities 2,2,2,2,1,3,2 --lower-bounds 1,1,0,0,1,0,1 --source 0 --sink 5 --requirement 3";
Expand Down Expand Up @@ -7049,6 +7111,7 @@ mod tests {
requirement_1: None,
requirement_2: None,
sizes: None,
probabilities: None,
capacity: None,
sequence: None,
sets: None,
Expand All @@ -7073,6 +7136,7 @@ mod tests {
tree: None,
required_edges: None,
bound: None,
latency_bound: None,
length_bound: None,
weight_bound: None,
cost_bound: None,
Expand Down Expand Up @@ -7111,6 +7175,7 @@ mod tests {
craftsman_avail: None,
task_avail: None,
num_groups: None,
num_sectors: None,
domain_size: None,
relations: None,
conjuncts_spec: None,
Expand Down Expand Up @@ -7375,6 +7440,61 @@ mod tests {
std::fs::remove_file(output_path).ok();
}

#[test]
fn test_create_expected_retrieval_cost_json() {
use crate::dispatch::ProblemJsonOutput;
use problemreductions::models::misc::ExpectedRetrievalCost;

let mut args = empty_args();
args.problem = Some("ExpectedRetrievalCost".to_string());
args.probabilities = Some("0.2,0.15,0.15,0.2,0.1,0.2".to_string());
args.num_sectors = Some(3);
args.latency_bound = Some(1.01);

let output_path = std::env::temp_dir().join(format!(
"expected-retrieval-cost-{}.json",
std::process::id()
));
let out = OutputConfig {
output: Some(output_path.clone()),
quiet: true,
json: false,
auto_json: false,
};

create(&args, &out).unwrap();

let json = std::fs::read_to_string(&output_path).unwrap();
let created: ProblemJsonOutput = serde_json::from_str(&json).unwrap();
assert_eq!(created.problem_type, "ExpectedRetrievalCost");

let problem: ExpectedRetrievalCost = serde_json::from_value(created.data).unwrap();
assert_eq!(problem.num_records(), 6);
assert_eq!(problem.num_sectors(), 3);
assert!(problem.evaluate(&[0, 1, 2, 1, 0, 2]));

let _ = std::fs::remove_file(output_path);
}

#[test]
fn test_create_expected_retrieval_cost_requires_latency_bound() {
let mut args = empty_args();
args.problem = Some("ExpectedRetrievalCost".to_string());
args.probabilities = Some("0.2,0.15,0.15,0.2,0.1,0.2".to_string());
args.num_sectors = Some(3);
args.latency_bound = None;

let out = OutputConfig {
output: None,
quiet: true,
json: false,
auto_json: false,
};

let err = create(&args, &out).unwrap_err().to_string();
assert!(err.contains("ExpectedRetrievalCost requires --latency-bound"));
}

#[test]
fn test_create_stacker_crane_json() {
let mut args = empty_args();
Expand Down
6 changes: 3 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ pub mod prelude {
pub use crate::models::misc::{
AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CapacityAssignment, CbqRelation,
ConjunctiveBooleanQuery, ConjunctiveQueryFoldability, ConsistencyOfDatabaseFrequencyTables,
EnsembleComputation, Factoring, FlowShopScheduling, Knapsack, LongestCommonSubsequence,
MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, Partition, QueryArg,
RectilinearPictureCompression, ResourceConstrainedScheduling,
EnsembleComputation, ExpectedRetrievalCost, Factoring, FlowShopScheduling, Knapsack,
LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop,
Partition, QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling,
SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost,
SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness,
SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals,
Expand Down
Loading
Loading