Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 89 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ This repository contains the source code for an EESSI status page generator. The

Create a configuration file (e.g., config.json). See [config.json](config.json) for an example. The only optional key is `backend_type` for servers. It defaults to `AutoDetect` if missing. See the section on server backend types for more information.

Note that `limit_scraping_to_repositories` controls how the scraper determines which repositories to scrape from each server. If set to `true`, only the repositories explicitly listed as `repositories` in the configuration will be scraped (and `ignored_repositories` will have no meaning). If set to `false`, the scraper will also consider repositories detected from the server itself (if applicable), filtered by `ignored_repositores`. The default is `false`.

## Usage

Run the binary with the desired options:
Expand Down Expand Up @@ -136,8 +138,17 @@ In this example, as the rules are applied in order, the engine will check, in or
## Prometheus Metrics

Prometheus metrics can be enabled with the `--prometheus-metrics` option. The metrics are exposed as the file `metrics` in the
output directory and are generated with the timestamp being the start of the application. A typical metrics file will look
like this:
output directory and are generated with the timestamp being the start of the application.

The status codes used in the metrics are as follows:

- `0`: OK
- `1`: Degraded
- `2`: Warning
- `3`: Failed
- `9`: Maintenance

A typical metrics file might look like this:

```prometheus
# HELP eessi_status EESSI status
Expand All @@ -155,12 +166,80 @@ syncservers_status 0 1720525887957
# HELP repositories_status Repositories status
# TYPE repositories_status gauge
repositories_status 0 1720525887957
```

The status codes are:
# HELP status_overview Status overview
# TYPE status_overview gauge
status_overview{category="overall"} 0 1761206997670
status_overview{category="stratum0"} 0 1761206997670
status_overview{category="stratum1"} 0 1761206997670
status_overview{category="syncservers"} 0 1761206997670
status_overview{category="repositories"} 0 1761206997670
# HELP repo_catalogue_size Repository catalogue size
# TYPE repo_catalogue_size gauge
repo_catalogue_size{type="stratum0",server="rug-nl-s0.eessi.science",repository="dev.eessi.io"} 9526272 1761206997670
repo_catalogue_size{type="stratum0",server="rug-nl-s0.eessi.science",repository="riscv.eessi.io"} 26624 1761206997670
repo_catalogue_size{type="stratum0",server="rug-nl-s0.eessi.science",repository="software.eessi.io"} 133120 1761206997670
repo_catalogue_size{type="stratum1",server="aws-eu-central-s1.eessi.science",repository="dev.eessi.io"} 9526272 1761206997670
repo_catalogue_size{type="stratum1",server="aws-eu-central-s1.eessi.science",repository="riscv.eessi.io"} 26624 1761206997670
repo_catalogue_size{type="stratum1",server="aws-eu-central-s1.eessi.science",repository="software.eessi.io"} 133120 1761206997670
repo_catalogue_size{type="stratum1",server="azure-us-east-s1.eessi.science",repository="dev.eessi.io"} 9526272 1761206997670
repo_catalogue_size{type="stratum1",server="azure-us-east-s1.eessi.science",repository="riscv.eessi.io"} 26624 1761206997670
repo_catalogue_size{type="stratum1",server="azure-us-east-s1.eessi.science",repository="software.eessi.io"} 133120 1761206997670
repo_catalogue_size{type="stratum1",server="cvmfs-ext.gridpp.rl.ac.uk:8000",repository="dev.eessi.io"} 9526272 1761206997670
repo_catalogue_size{type="stratum1",server="cvmfs-ext.gridpp.rl.ac.uk:8000",repository="riscv.eessi.io"} 26624 1761206997670
repo_catalogue_size{type="stratum1",server="cvmfs-ext.gridpp.rl.ac.uk:8000",repository="software.eessi.io"} 133120 1761206997670
repo_catalogue_size{type="syncserver",server="aws-eu-west-s1-sync.eessi.science",repository="dev.eessi.io"} 9526272 1761206997670
repo_catalogue_size{type="syncserver",server="aws-eu-west-s1-sync.eessi.science",repository="riscv.eessi.io"} 26624 1761206997670
repo_catalogue_size{type="syncserver",server="aws-eu-west-s1-sync.eessi.science",repository="software.eessi.io"} 133120 1761206997670
# HELP repo_revision Repository revision
# TYPE repo_revision gauge
repo_revision{type="stratum0",server="rug-nl-s0.eessi.science",repository="dev.eessi.io"} 415 1761206997670
repo_revision{type="stratum0",server="rug-nl-s0.eessi.science",repository="riscv.eessi.io"} 522 1761206997670
repo_revision{type="stratum0",server="rug-nl-s0.eessi.science",repository="software.eessi.io"} 9744 1761206997670
repo_revision{type="stratum1",server="aws-eu-central-s1.eessi.science",repository="dev.eessi.io"} 415 1761206997670
repo_revision{type="stratum1",server="aws-eu-central-s1.eessi.science",repository="riscv.eessi.io"} 522 1761206997670
repo_revision{type="stratum1",server="aws-eu-central-s1.eessi.science",repository="software.eessi.io"} 9744 1761206997670
repo_revision{type="stratum1",server="azure-us-east-s1.eessi.science",repository="dev.eessi.io"} 415 1761206997670
repo_revision{type="stratum1",server="azure-us-east-s1.eessi.science",repository="riscv.eessi.io"} 522 1761206997670
repo_revision{type="stratum1",server="azure-us-east-s1.eessi.science",repository="software.eessi.io"} 9744 1761206997670
repo_revision{type="stratum1",server="cvmfs-ext.gridpp.rl.ac.uk:8000",repository="dev.eessi.io"} 415 1761206997670
repo_revision{type="stratum1",server="cvmfs-ext.gridpp.rl.ac.uk:8000",repository="riscv.eessi.io"} 522 1761206997670
repo_revision{type="stratum1",server="cvmfs-ext.gridpp.rl.ac.uk:8000",repository="software.eessi.io"} 9744 1761206997670
repo_revision{type="syncserver",server="aws-eu-west-s1-sync.eessi.science",repository="dev.eessi.io"} 415 1761206997670
repo_revision{type="syncserver",server="aws-eu-west-s1-sync.eessi.science",repository="riscv.eessi.io"} 522 1761206997670
repo_revision{type="syncserver",server="aws-eu-west-s1-sync.eessi.science",repository="software.eessi.io"} 9744 1761206997670
# HELP repo_timestamp Repository timestamp
# TYPE repo_timestamp gauge
repo_timestamp{type="stratum0",server="rug-nl-s0.eessi.science",repository="dev.eessi.io"} 1760706941 1761206997670
repo_timestamp{type="stratum0",server="rug-nl-s0.eessi.science",repository="riscv.eessi.io"} 1750670430 1761206997670
repo_timestamp{type="stratum0",server="rug-nl-s0.eessi.science",repository="software.eessi.io"} 1761150935 1761206997670
repo_timestamp{type="stratum1",server="aws-eu-central-s1.eessi.science",repository="dev.eessi.io"} 1760706941 1761206997670
repo_timestamp{type="stratum1",server="aws-eu-central-s1.eessi.science",repository="riscv.eessi.io"} 1750670430 1761206997670
repo_timestamp{type="stratum1",server="aws-eu-central-s1.eessi.science",repository="software.eessi.io"} 1761150935 1761206997670
repo_timestamp{type="stratum1",server="azure-us-east-s1.eessi.science",repository="dev.eessi.io"} 1760706941 1761206997670
repo_timestamp{type="stratum1",server="azure-us-east-s1.eessi.science",repository="riscv.eessi.io"} 1750670430 1761206997670
repo_timestamp{type="stratum1",server="azure-us-east-s1.eessi.science",repository="software.eessi.io"} 1761150935 1761206997670
repo_timestamp{type="stratum1",server="cvmfs-ext.gridpp.rl.ac.uk:8000",repository="dev.eessi.io"} 1760706941 1761206997670
repo_timestamp{type="stratum1",server="cvmfs-ext.gridpp.rl.ac.uk:8000",repository="riscv.eessi.io"} 1750670430 1761206997670
repo_timestamp{type="stratum1",server="cvmfs-ext.gridpp.rl.ac.uk:8000",repository="software.eessi.io"} 1761150935 1761206997670
repo_timestamp{type="syncserver",server="aws-eu-west-s1-sync.eessi.science",repository="dev.eessi.io"} 1760706941 1761206997670
repo_timestamp{type="syncserver",server="aws-eu-west-s1-sync.eessi.science",repository="riscv.eessi.io"} 1750670430 1761206997670
repo_timestamp{type="syncserver",server="aws-eu-west-s1-sync.eessi.science",repository="software.eessi.io"} 1761150935 1761206997670
# HELP repo_ttl Repository TTL
# TYPE repo_ttl gauge
repo_ttl{type="stratum0",server="rug-nl-s0.eessi.science",repository="dev.eessi.io"} 240 1761206997670
repo_ttl{type="stratum0",server="rug-nl-s0.eessi.science",repository="riscv.eessi.io"} 240 1761206997670
repo_ttl{type="stratum0",server="rug-nl-s0.eessi.science",repository="software.eessi.io"} 240 1761206997670
repo_ttl{type="stratum1",server="aws-eu-central-s1.eessi.science",repository="dev.eessi.io"} 240 1761206997670
repo_ttl{type="stratum1",server="aws-eu-central-s1.eessi.science",repository="riscv.eessi.io"} 240 1761206997670
repo_ttl{type="stratum1",server="aws-eu-central-s1.eessi.science",repository="software.eessi.io"} 240 1761206997670
repo_ttl{type="stratum1",server="azure-us-east-s1.eessi.science",repository="dev.eessi.io"} 240 1761206997670
repo_ttl{type="stratum1",server="azure-us-east-s1.eessi.science",repository="riscv.eessi.io"} 240 1761206997670
repo_ttl{type="stratum1",server="azure-us-east-s1.eessi.science",repository="software.eessi.io"} 240 1761206997670
repo_ttl{type="stratum1",server="cvmfs-ext.gridpp.rl.ac.uk:8000",repository="dev.eessi.io"} 240 1761206997670
repo_ttl{type="stratum1",server="cvmfs-ext.gridpp.rl.ac.uk:8000",repository="riscv.eessi.io"} 240 1761206997670
repo_ttl{type="stratum1",server="cvmfs-ext.gridpp.rl.ac.uk:8000",repository="software.eessi.io"} 240 1761206997670
repo_ttl{type="syncserver",server="aws-eu-west-s1-sync.eessi.science",repository="dev.eessi.io"} 240 1761206997670
repo_ttl{type="syncserver",server="aws-eu-west-s1-sync.eessi.science",repository="riscv.eessi.io"} 240 1761206997670
repo_ttl{type="syncserver",server="aws-eu-west-s1-sync.eessi.science",repository="software.eessi.io"} 240 1761206997670

- `0`: OK
- `1`: Degraded
- `2`: Warning
- `3`: Failed
- `9`: Maintenance
```
1 change: 1 addition & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"dev.eessi.io",
"riscv.eessi.io"
],
"limit_scraping_to_repositories": false,
"ignored_repositories": [
"test.eessi.io"
],
Expand Down
143 changes: 122 additions & 21 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@ use std::path::{Path, PathBuf};
mod config;
mod dependencies;
mod models;
mod prometheus;
mod templating;

use config::{get_config_manager, init_config};
use cvmfs_server_scraper::{Scraper, ScraperCommon, ServerType};
use dependencies::{atomic_write, populate};
use models::{EESSIStatus, Status, StatusManager, StatusPageData, StratumStatus};
use models::{EESSIStatus, Status, StatusManager, StatusPageData, StratumStatus, ToEESSILabel};
use prometheus::MetricsBuilder;
use templating::{render_template_to_file, RepoStatus, StatusInfo};

#[derive(Parser, Debug)]
Expand Down Expand Up @@ -91,7 +93,7 @@ async fn main() -> Result<()> {
render_output(&args, &status_page_data)?;

if args.prometheus_metrics {
generate_prometheus_metrics(&args, &status_page_data, &run_start_time)?;
generate_prometheus_metrics(&args, &status_page_data, &status_manager, &run_start_time)?;
}

Ok(())
Expand Down Expand Up @@ -180,33 +182,132 @@ fn generate_status_page_data(
fn generate_prometheus_metrics(
args: &Opt,
status_page_data: &StatusPageData,
status_manager: &StatusManager,
timestamp: &DateTime<Utc>,
) -> Result<()> {
use crate::models::StatusLevel;

let filename = args.destination.join("metrics");
trace!("Generating Prometheus metrics file: {:?}", filename);

let ms_since_epoch = timestamp.timestamp_millis();

let metrics = format!(
"# HELP eessi_status EESSI status\n# TYPE eessi_status gauge\n\
eessi_status {} {ms_since_epoch}\n\
# HELP stratum0_status Stratum0 status\n# TYPE stratum0_status gauge\n\
stratum0_status {} {ms_since_epoch}\n\
# HELP stratum1_status Stratum1 status\n# TYPE stratum1_status gauge\n\
stratum1_status {} {ms_since_epoch}\n\
# HELP syncservers_status SyncServers status\n# TYPE syncservers_status gauge\n\
syncservers_status {} {ms_since_epoch}\n\
# HELP repositories_status Repositories status\n# TYPE repositories_status gauge\n\
repositories_status {} {ms_since_epoch}\n",
status_page_data.eessi_status.level(),
status_page_data.stratum0.level(),
status_page_data.stratum1.level(),
status_page_data.syncservers.level(),
status_page_data.repositories_status.level()
let ts = timestamp.timestamp_millis();

let mut b = MetricsBuilder::new();
b.add_gauge(
"eessi_status",
"EESSI status",
status_page_data.eessi_status.level() as f64,
&[],
Some(ts),
)
.add_gauge(
"stratum0_status",
"Stratum0 status",
status_page_data.stratum0.level() as f64,
&[],
Some(ts),
)
.add_gauge(
"stratum1_status",
"Stratum1 status",
status_page_data.stratum1.level() as f64,
&[],
Some(ts),
)
.add_gauge(
"syncservers_status",
"SyncServers status",
status_page_data.syncservers.level() as f64,
&[],
Some(ts),
)
.add_gauge(
"repositories_status",
"Repositories status",
status_page_data.repositories_status.level() as f64,
&[],
Some(ts),
);

atomic_write(&filename, metrics.as_bytes())?;
let maps = vec![
("overall", status_page_data.eessi_status.level() as f64),
("stratum0", status_page_data.stratum0.level() as f64),
("stratum1", status_page_data.stratum1.level() as f64),
("syncservers", status_page_data.syncservers.level() as f64),
(
"repositories",
status_page_data.repositories_status.level() as f64,
),
];

for (category, level) in maps {
b.add_gauge(
"status_overview",
"Status overview",
level,
&[("category", category)],
Some(ts),
);
}

for server in status_manager.get_all_servers() {
let ts_ms = Some(ts);

for repo in server.repositories.iter() {
let repo_labels: [(&str, &str); 3] = [
("type", server.server_type.to_label()),
("server", server.hostname.to_str()),
("repository", repo.name.as_str()),
];

// The fields are:
// - c: Cryptographic hash of the repository’s current root catalog
// - b: Size of the root file catalog in bytes
// - a: true if the catalog should be fetched under its alternative name
// - r: MD5 hash of the repository’s current root path (usually always d41d8cd98f00b204e9800998ecf8427e)
// - x: Cryptographic hash of the signing certificate
// - g: true if the repository is garbage-collectable
// - h: Cryptographic hash of the repository’s named tag history database
// - t: Unix timestamp of this particular revision
// - d: Time To Live (TTL) of the root catalog
// - s: Revision number of this published revision
// - n: The full name of the manifested repository
// - m: Cryptographic hash of the repository JSON metadata
// - y: Cryptographic hash of the reflog checksum
// - l: currently unused (reserved for micro catalogs)
b.add_gauge(
"repo_revision",
"Repository revision",
repo.revision as f64,
&repo_labels,
ts_ms,
)
.add_gauge(
"repo_timestamp",
"Repository timestamp",
repo.manifest.t as f64,
&repo_labels,
ts_ms,
)
.add_gauge(
"repo_ttl",
"Repository TTL",
repo.manifest.d as f64,
&repo_labels,
ts_ms,
)
.add_gauge(
"repo_catalogue_size",
"Repository catalogue size",
repo.manifest.b as f64,
&repo_labels,
ts_ms,
);
}
}

let text = b.build();
atomic_write(&filename, text.as_bytes())?;
info!("Prometheus metrics file written to: {:?}", filename);
Ok(())
}
Expand Down
24 changes: 22 additions & 2 deletions src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use strum::IntoEnumIterator;
use strum_macros::{AsRefStr, EnumIter};

use cvmfs_server_scraper::{
Hostname, PopulatedRepositoryOrReplica, PopulatedServer, ScrapedServer, ServerBackendType,
ServerMetadata, ServerType,
Hostname, Manifest, PopulatedRepositoryOrReplica, PopulatedServer, ScrapedServer,
ServerBackendType, ServerMetadata, ServerType,
};

use crate::config::{Condition, ConfigFile};
Expand Down Expand Up @@ -204,6 +204,7 @@ impl StatusLevel for RepoStatus {}
pub struct Repositories {
pub name: String,
pub revision: i32,
pub manifest: Manifest,
pub status: Status,
/// Is the revision in sync with either the stratum0 or the stratum1s?
pub status_revision: Status,
Expand Down Expand Up @@ -232,6 +233,20 @@ impl Server {
}
}

pub trait ToEESSILabel {
fn to_label(&self) -> &str;
}

impl ToEESSILabel for ServerType {
fn to_label(&self) -> &str {
match self {
ServerType::Stratum0 => "stratum0",
ServerType::Stratum1 => "stratum1",
ServerType::SyncServer => "syncserver",
}
}
}

pub struct StatusManager {
pub servers: Vec<Server>,
}
Expand All @@ -251,6 +266,7 @@ impl StatusManager {
Repositories {
name: repo.name.clone(),
revision: repo.revision(),
manifest: repo.manifest.clone(),
status: status_revision,
status_revision,
}
Expand Down Expand Up @@ -292,6 +308,10 @@ impl StatusManager {
self.servers.iter().map(Server::to_server_status).collect()
}

pub fn get_all_servers(&self) -> Vec<&Server> {
self.servers.iter().collect()
}

pub fn get_by_type(&self, server_type: ServerType) -> Vec<&Server> {
self.servers
.iter()
Expand Down
Loading