## Prometheus setup

In [26]:

import subprocess
import os
import json
from IPython.display import display, Markdown

# Utility to run shell commands

def run(cmd):
    print(f"\n$ {cmd}")
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    if result.stdout:
        print(result.stdout)
    if result.stderr:
        print("[stderr]", result.stderr)
    return result

# Detect current namespace

In [None]:
ns_result = subprocess.run("kubectl config view --minify -o jsonpath='{..namespace}'", shell=True, capture_output=True, text=True)
NAMESPACE = ns_result.stdout.strip("'\n") or "default"
print(f"[INFO] Detected namespace: {NAMESPACE}")

# Install Prometheus Operator via Subscription

In [None]:
prom_operator_yaml = f"""
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
  name: prometheus
  namespace: {NAMESPACE}
spec:
  channel: beta
  name: prometheus
  source: community-operators
  sourceNamespace: openshift-marketplace
"""

with open("prometheus-subscription.yaml", "w") as f:
    f.write(prom_operator_yaml)

run("kubectl apply -f prometheus-subscription.yaml")
os.remove("prometheus-subscription.yaml")


# Detect OpenShift cluster domain

In [None]:
def get_cluster_domain():
    try:
        result = subprocess.run(
            "oc get routes console -n openshift-console -o json",
            shell=True, capture_output=True, text=True, check=True
        )
        routes = json.loads(result.stdout)
        host = routes["status"]["ingress"][0]["routerCanonicalHostname"]
        domain = host.replace("router-default.", "")
        return domain
    except Exception as e:
        print(f"❌ Could not determine cluster domain: {e}")
        return "REPLACE_ME_DOMAIN"

CLUSTER_DOMAIN = get_cluster_domain()
print(f"[INFO] Detected cluster domain: {CLUSTER_DOMAIN}")

# Create ServiceAccount

In [None]:
run(f"kubectl create serviceaccount prometheus-monitoring -n {NAMESPACE}")

# Create ClusterRoleBinding

In [None]:
cluster_role_binding_yaml = f"""
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: prometheus-monitoring
subjects:
  - kind: ServiceAccount
    name: prometheus-monitoring
    namespace: {NAMESPACE}
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-monitoring-view
"""

with open("prometheus-cluster-role.yaml", "w") as f:
    f.write(cluster_role_binding_yaml)

run("kubectl apply -f prometheus-cluster-role.yaml")
os.remove("prometheus-cluster-role.yaml")

# Create token for the ServiceAccount

In [None]:
print("\nCreating token valid for 24h...")
token_result = subprocess.run(f"kubectl create token prometheus-monitoring --duration=24h -n {NAMESPACE}", shell=True, capture_output=True, text=True)
SERVICE_ACCOUNT_TOKEN = token_result.stdout.strip()
print(f"[INFO] Token retrieved: {SERVICE_ACCOUNT_TOKEN[:10]}... (truncated)")

# Write scrape config

In [None]:
scrape_config = f"""
- job_name: custom-monitoring
  honor_labels: true
  static_configs:
    - targets:
      - 'prometheus-k8s.openshift-monitoring.svc:9091'
  scrape_interval: 30s
  metrics_path: /federate
  params:
    match[]:
    - '{{endpoint="https-metrics"}}'
    - '{{service="kube-state-metrics"}}'
    - '{{service="node-exporter"}}'
    - '{{__name__=~"namespace_pod_name_container_name:.*"}}'
    - '{{__name__=~"node_namespace_pod_container:.*"}}'
    - '{{__name__=~"node:.*"}}'
    - '{{__name__=~"instance:.*"}}'
    - '{{__name__=~"container_memory_.*"}}'
    - '{{__name__=~":node_memory_.*"}}'
  scheme: https
  tls_config:
    insecure_skip_verify: true
  bearer_token: "{SERVICE_ACCOUNT_TOKEN}"
  metric_relabel_configs:
  - action: labeldrop
    regex: prometheus_replica
"""

with open("scrape-config.yaml", "w") as f:
    f.write(scrape_config)

run(f"kubectl create secret generic additional-scrape-configs --from-file=custom-scrape-configs.yaml=scrape-config.yaml -n {NAMESPACE}")
os.remove("scrape-config.yaml")

# Apply Prometheus CR

In [None]:
prom_cr = f"""
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
  labels:
    prometheus: k8s
  name: example
  namespace: {NAMESPACE}
spec:
  externalUrl: https://prometheus.{NAMESPACE}.{CLUSTER_DOMAIN}
  podMonitorSelector: {{}}
  replicas: 1
  ruleSelector: {{}}
  securityContext: {{}}
  serviceAccountName: prometheus-monitoring
  serviceMonitorSelector: {{}}
  additionalScrapeConfigs:
    key: custom-scrape-configs.yaml
    name: additional-scrape-configs
"""

with open("prometheus-cr.yaml", "w") as f:
    f.write(prom_cr)

run("kubectl apply -f prometheus-cr.yaml")
os.remove("prometheus-cr.yaml")

# Expose Prometheus

In [None]:
run(f"oc expose service prometheus-operated --hostname prometheus.{NAMESPACE}.{CLUSTER_DOMAIN}")