CodingThrust · isPANN · Mar 22, 2026 · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026
diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ
@@ -148,6 +148,7 @@
   "MinimumTardinessSequencing": [Minimum Tardiness Sequencing],
   "MultipleChoiceBranching": [Multiple Choice Branching],
   "MultipleCopyFileAllocation": [Multiple Copy File Allocation],
+  "ExpectedRetrievalCost": [Expected Retrieval Cost],
   "MultiprocessorScheduling": [Multiprocessor Scheduling],
   "PartitionIntoPathsOfLength2": [Partition into Paths of Length 2],
   "PartitionIntoTriangles": [Partition Into Triangles],
@@ -2460,6 +2461,45 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76],
   ]
 }
 
+#{
+  let x = load-model-example("ExpectedRetrievalCost")
+  let K = x.instance.bound
+  [
+    #problem-def("ExpectedRetrievalCost")[
+      Given a set $R = {r_1, dots, r_n}$ of records, access probabilities $p(r) in [0, 1]$ with $sum_(r in R) p(r) = 1$, a positive integer $m$ of circular storage sectors, and a bound $K$, determine whether there exists a partition $R_1, dots, R_m$ of $R$ such that
+      $sum_(i=1)^m sum_(j=1)^m p(R_i) p(R_j) d(i, j) <= K,$
+      where $p(R_i) = sum_(r in R_i) p(r)$ and
+      $d(i, j) = j - i - 1$ for $1 <= i < j <= m$, while $d(i, j) = m - i + j - 1$ for $1 <= j <= i <= m$.
+    ][
+    Expected Retrieval Cost is storage-and-retrieval problem SR4 in Garey and Johnson @garey1979. The model abstracts a drum-like storage device with fixed read heads: placing probability mass evenly around the cycle reduces the expected waiting time until the next requested sector rotates under the head. Cody and Coffman introduced the formulation and analyzed exact and heuristic record-allocation algorithms for fixed numbers of sectors @codycoffman1976. Garey and Johnson record that the general decision problem is NP-complete in the strong sense via transformations from Partition and 3-Partition @garey1979. The implementation in this repository uses one $m$-ary variable per record, so the registered exact baseline enumerates $m^n$ assignments. For practicality, the code stores the probabilities and bound as floating-point values even though the book states $K$ as an integer.
+
+    *Example.* Take six records with probabilities $(0.2, 0.15, 0.15, 0.2, 0.1, 0.2)$, three sectors, and $K = #K$. Assign
+    $R_1 = {r_1, r_5}$, $R_2 = {r_2, r_4}$, and $R_3 = {r_3, r_6}$.
+    Then the sector masses are $(p(R_1), p(R_2), p(R_3)) = (0.3, 0.35, 0.35)$.
+    For $m = 3$, the non-zero latencies are $d(1, 1) = d(2, 2) = d(3, 3) = 2$, $d(1, 3) = d(2, 1) = d(3, 2) = 1$, and the remaining pairs contribute 0. Hence the expected retrieval cost is $1.0025 <= #K$, so the allocation is satisfying.
+
+    #pred-commands(
+      "pred create --example ExpectedRetrievalCost -o expected-retrieval-cost.json",
+      "pred solve expected-retrieval-cost.json --solver brute-force",
+      "pred evaluate expected-retrieval-cost.json --config " + x.optimal_config.map(str).join(","),
+    )
+
+    #figure(
+      table(
+        columns: 3,
+        inset: 6pt,
+        stroke: 0.5pt + luma(180),
+        [Sector], [Records], [Mass],
+        [$S_1$], [$r_1, r_5$], [$0.3$],
+        [$S_2$], [$r_2, r_4$], [$0.35$],
+        [$S_3$], [$r_3, r_6$], [$0.35$],
+      ),
+      caption: [Expected Retrieval Cost example with cyclic sector order $S_1 -> S_2 -> S_3 -> S_1$. The satisfying allocation yields masses $(0.3, 0.35, 0.35)$ and total cost $1.0025$.],
+    ) <fig:expected-retrieval-cost>
+    ]
+  ]
+}
+
 == Set Problems
 
 #{

diff --git a/docs/paper/references.bib b/docs/paper/references.bib
@@ -1116,6 +1116,17 @@ @article{coffman1972
   doi     = {10.1007/BF00288685}
 }
 
+@article{codycoffman1976,
+  author  = {R. A. Cody and E. G. Coffman, Jr.},
+  title   = {Record Allocation for Minimizing Expected Retrieval Costs on Drum-Like Storage Devices},
+  journal = {Journal of the ACM},
+  volume  = {23},
+  number  = {1},
+  pages   = {103--115},
+  year    = {1976},
+  doi     = {10.1145/321921.321933}
+}
+
 @inproceedings{cordella2004,
   author    = {Luigi P. Cordella and Pasquale Foggia and Carlo Sansone and Mario Vento},
   title     = {A (Sub)Graph Isomorphism Algorithm for Matching Large Graphs},

diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs
@@ -249,6 +249,7 @@ Flags by problem type:
   CapacityAssignment              --capacities, --cost-matrix, --delay-matrix, --cost-budget, --delay-budget
   SubsetSum                       --sizes, --target
   SumOfSquaresPartition           --sizes, --num-groups, --bound
+  ExpectedRetrievalCost           --probabilities, --num-sectors, --latency-bound
   PaintShop                       --sequence
   MaximumSetPacking               --sets [--weights]
   MinimumHittingSet               --universe, --sets
@@ -474,6 +475,9 @@ pub struct CreateArgs {
     /// Item sizes for BinPacking (comma-separated, e.g., "3,3,2,2")
     #[arg(long)]
     pub sizes: Option<String>,
+    /// Record access probabilities for ExpectedRetrievalCost (comma-separated, e.g., "0.2,0.15,0.15,0.2,0.1,0.2")
+    #[arg(long)]
+    pub probabilities: Option<String>,
     /// Bin capacity for BinPacking
     #[arg(long)]
     pub capacity: Option<String>,
@@ -546,6 +550,9 @@ pub struct CreateArgs {
     /// Bound parameter (lower bound for LongestCircuit; upper or length bound for BoundedComponentSpanningForest, LengthBoundedDisjointPaths, LongestCommonSubsequence, MultipleCopyFileAllocation, MultipleChoiceBranching, OptimalLinearArrangement, RootedTreeArrangement, RuralPostman, ShortestCommonSupersequence, or StringToStringCorrection)
     #[arg(long, allow_hyphen_values = true)]
     pub bound: Option<i64>,
+    /// Upper bound on expected retrieval latency for ExpectedRetrievalCost
+    #[arg(long)]
+    pub latency_bound: Option<f64>,
     /// Upper bound on total path length
     #[arg(long)]
     pub length_bound: Option<i32>,
@@ -703,6 +710,9 @@ pub struct CreateArgs {
     /// Number of groups for SumOfSquaresPartition
     #[arg(long)]
     pub num_groups: Option<usize>,
+    /// Number of sectors for ExpectedRetrievalCost
+    #[arg(long)]
+    pub num_sectors: Option<usize>,
     /// Source string for StringToStringCorrection (comma-separated symbol indices, e.g., "0,1,2,3")
     #[arg(long)]
     pub source_string: Option<String>,

diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs
@@ -21,13 +21,14 @@ use problemreductions::models::graph::{
 use problemreductions::models::misc::{
     AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CapacityAssignment, CbqRelation,
     ConjunctiveBooleanQuery, ConsistencyOfDatabaseFrequencyTables, EnsembleComputation,
-    FlowShopScheduling, FrequencyTable, KnownValue, LongestCommonSubsequence,
-    MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack,
-    QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling,
-    SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost,
-    SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness,
-    SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence,
-    StringToStringCorrection, SubsetSum, SumOfSquaresPartition, TimetableDesign,
+    ExpectedRetrievalCost, FlowShopScheduling, FrequencyTable, KnownValue,
+    LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop,
+    PartiallyOrderedKnapsack, QueryArg, RectilinearPictureCompression,
+    ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines,
+    SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime,
+    SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines,
+    SequencingWithinIntervals, ShortestCommonSupersequence, StringToStringCorrection, SubsetSum,
+    SumOfSquaresPartition, TimetableDesign,
 };
 use problemreductions::models::BiconnectivityAugmentation;
 use problemreductions::prelude::*;
@@ -43,6 +44,10 @@ const MULTIPLE_COPY_FILE_ALLOCATION_EXAMPLE_ARGS: &str =
     "--graph 0-1,1-2,2-3 --usage 5,4,3,2 --storage 1,1,1,1 --bound 8";
 const MULTIPLE_COPY_FILE_ALLOCATION_USAGE: &str =
     "Usage: pred create MultipleCopyFileAllocation --graph 0-1,1-2,2-3 --usage 5,4,3,2 --storage 1,1,1,1 --bound 8";
+const EXPECTED_RETRIEVAL_COST_EXAMPLE_ARGS: &str =
+    "--probabilities 0.2,0.15,0.15,0.2,0.1,0.2 --num-sectors 3 --latency-bound 1.01";
+const EXPECTED_RETRIEVAL_COST_USAGE: &str =
+    "Usage: pred create ExpectedRetrievalCost --probabilities 0.2,0.15,0.15,0.2,0.1,0.2 --num-sectors 3 --latency-bound 1.01";
 
 /// Check if all data flags are None (no problem-specific input provided).
 fn all_data_flags_empty(args: &CreateArgs) -> bool {
@@ -85,6 +90,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool {
         && args.requirement_2.is_none()
         && args.requirement.is_none()
         && args.sizes.is_none()
+        && args.probabilities.is_none()
         && args.capacity.is_none()
         && args.sequence.is_none()
         && args.sets.is_none()
@@ -110,6 +116,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool {
         && args.tree.is_none()
         && args.required_edges.is_none()
         && args.bound.is_none()
+        && args.latency_bound.is_none()
         && args.length_bound.is_none()
         && args.weight_bound.is_none()
         && args.cost_bound.is_none()
@@ -152,6 +159,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool {
         && args.task_avail.is_none()
         && args.alphabet_size.is_none()
         && args.num_groups.is_none()
+        && args.num_sectors.is_none()
         && args.dependencies.is_none()
         && args.num_attributes.is_none()
         && args.source_string.is_none()
@@ -606,6 +614,7 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str {
         }
         "MultiprocessorScheduling" => "--lengths 4,5,3,2,6 --num-processors 2 --deadline 10",
         "MinimumMultiwayCut" => "--graph 0-1,1-2,2-3 --terminals 0,2 --edge-weights 1,1,1",
+        "ExpectedRetrievalCost" => EXPECTED_RETRIEVAL_COST_EXAMPLE_ARGS,
         "SequencingWithinIntervals" => "--release-times 0,0,5 --deadlines 11,11,6 --lengths 3,1,1",
         "StaffScheduling" => {
             "--schedules \"1,1,1,1,1,0,0;0,1,1,1,1,1,0;0,0,1,1,1,1,1;1,0,0,1,1,1,1;1,1,0,0,1,1,1\" --requirements 2,2,2,3,3,2,1 --num-workers 4 --k 5"
@@ -1510,6 +1519,59 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> {
             )
         }
 
+        // ExpectedRetrievalCost (probabilities + sectors + latency bound)
+        "ExpectedRetrievalCost" => {
+            let probabilities_str = args.probabilities.as_deref().ok_or_else(|| {
+                anyhow::anyhow!(
+                    "ExpectedRetrievalCost requires --probabilities\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
+                )
+            })?;
+            let probabilities: Vec<f64> = util::parse_comma_list(probabilities_str)
+                .map_err(|e| anyhow::anyhow!("{e}\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"))?;
+            anyhow::ensure!(
+                !probabilities.is_empty(),
+                "ExpectedRetrievalCost requires at least one probability\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
+            );
+            anyhow::ensure!(
+                probabilities.iter().all(|p| p.is_finite() && (0.0..=1.0).contains(p)),
+                "ExpectedRetrievalCost probabilities must be finite values in [0, 1]\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
+            );
+            let total_probability: f64 = probabilities.iter().sum();
+            anyhow::ensure!(
+                (total_probability - 1.0).abs() <= 1e-9,
+                "ExpectedRetrievalCost probabilities must sum to 1.0\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
+            );
+
+            let num_sectors = args.num_sectors.ok_or_else(|| {
+                anyhow::anyhow!(
+                    "ExpectedRetrievalCost requires --num-sectors\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
+                )
+            })?;
+            anyhow::ensure!(
+                num_sectors >= 2,
+                "ExpectedRetrievalCost requires at least two sectors\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
+            );
+
+            let latency_bound = args.latency_bound.ok_or_else(|| {
+                anyhow::anyhow!(
+                    "ExpectedRetrievalCost requires --latency-bound\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
+                )
+            })?;
+            anyhow::ensure!(
+                latency_bound.is_finite() && latency_bound >= 0.0,
+                "ExpectedRetrievalCost requires a finite non-negative --latency-bound\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
+            );
+
+            (
+                ser(ExpectedRetrievalCost::new(
+                    probabilities,
+                    num_sectors,
+                    latency_bound,
+                ))?,
+                resolved_variant.clone(),
+            )
+        }
+
         // UndirectedFlowLowerBounds (graph + capacities + lower bounds + terminals + requirement)
         "UndirectedFlowLowerBounds" => {
             let usage = "Usage: pred create UndirectedFlowLowerBounds --graph 0-1,0-2,1-3,2-3,1-4,3-5,4-5 --capacities 2,2,2,2,1,3,2 --lower-bounds 1,1,0,0,1,0,1 --source 0 --sink 5 --requirement 3";
@@ -7049,6 +7111,7 @@ mod tests {
             requirement_1: None,
             requirement_2: None,
             sizes: None,
+            probabilities: None,
             capacity: None,
             sequence: None,
             sets: None,
@@ -7073,6 +7136,7 @@ mod tests {
             tree: None,
             required_edges: None,
             bound: None,
+            latency_bound: None,
             length_bound: None,
             weight_bound: None,
             cost_bound: None,
@@ -7111,6 +7175,7 @@ mod tests {
             craftsman_avail: None,
             task_avail: None,
             num_groups: None,
+            num_sectors: None,
             domain_size: None,
             relations: None,
             conjuncts_spec: None,
@@ -7375,6 +7440,61 @@ mod tests {
         std::fs::remove_file(output_path).ok();
     }
 
+    #[test]
+    fn test_create_expected_retrieval_cost_json() {
+        use crate::dispatch::ProblemJsonOutput;
+        use problemreductions::models::misc::ExpectedRetrievalCost;
+
+        let mut args = empty_args();
+        args.problem = Some("ExpectedRetrievalCost".to_string());
+        args.probabilities = Some("0.2,0.15,0.15,0.2,0.1,0.2".to_string());
+        args.num_sectors = Some(3);
+        args.latency_bound = Some(1.01);
+
+        let output_path = std::env::temp_dir().join(format!(
+            "expected-retrieval-cost-{}.json",
+            std::process::id()
+        ));
+        let out = OutputConfig {
+            output: Some(output_path.clone()),
+            quiet: true,
+            json: false,
+            auto_json: false,
+        };
+
+        create(&args, &out).unwrap();
+
+        let json = std::fs::read_to_string(&output_path).unwrap();
+        let created: ProblemJsonOutput = serde_json::from_str(&json).unwrap();
+        assert_eq!(created.problem_type, "ExpectedRetrievalCost");
+
+        let problem: ExpectedRetrievalCost = serde_json::from_value(created.data).unwrap();
+        assert_eq!(problem.num_records(), 6);
+        assert_eq!(problem.num_sectors(), 3);
+        assert!(problem.evaluate(&[0, 1, 2, 1, 0, 2]));
+
+        let _ = std::fs::remove_file(output_path);
+    }
+
+    #[test]
+    fn test_create_expected_retrieval_cost_requires_latency_bound() {
+        let mut args = empty_args();
+        args.problem = Some("ExpectedRetrievalCost".to_string());
+        args.probabilities = Some("0.2,0.15,0.15,0.2,0.1,0.2".to_string());
+        args.num_sectors = Some(3);
+        args.latency_bound = None;
+
+        let out = OutputConfig {
+            output: None,
+            quiet: true,
+            json: false,
+            auto_json: false,
+        };
+
+        let err = create(&args, &out).unwrap_err().to_string();
+        assert!(err.contains("ExpectedRetrievalCost requires --latency-bound"));
+    }
+
     #[test]
     fn test_create_stacker_crane_json() {
         let mut args = empty_args();

diff --git a/src/lib.rs b/src/lib.rs
@@ -69,9 +69,9 @@ pub mod prelude {
     pub use crate::models::misc::{
         AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CapacityAssignment, CbqRelation,
         ConjunctiveBooleanQuery, ConjunctiveQueryFoldability, ConsistencyOfDatabaseFrequencyTables,
-        EnsembleComputation, Factoring, FlowShopScheduling, Knapsack, LongestCommonSubsequence,
-        MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, Partition, QueryArg,
-        RectilinearPictureCompression, ResourceConstrainedScheduling,
+        EnsembleComputation, ExpectedRetrievalCost, Factoring, FlowShopScheduling, Knapsack,
+        LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop,
+        Partition, QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling,
         SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost,
         SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness,
         SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals,