diff --git a/libs/domains/observability/data-access/src/lib/domains-observability-data-access.ts b/libs/domains/observability/data-access/src/lib/domains-observability-data-access.ts index 711c6ed4a54..ef80bafcc73 100644 --- a/libs/domains/observability/data-access/src/lib/domains-observability-data-access.ts +++ b/libs/domains/observability/data-access/src/lib/domains-observability-data-access.ts @@ -9,13 +9,11 @@ import { type AlertRuleEditRequest, AlertRulesApi, ClustersApi, - OrganizationMainCallsApi, } from 'qovery-typescript-axios' const clusterApi = new ClustersApi() const alertRulesApi = new AlertRulesApi() const alertReceiversApi = new AlertReceiversApi() -const organizationApi = new OrganizationMainCallsApi() export const observability = createQueryKeys('observability', { containerName: ({ @@ -167,6 +165,40 @@ export const observability = createQueryKeys('observability', { return response.data.metrics && (JSON.parse(response.data.metrics).data[0] as string) }, }), + podNames: ({ + clusterId, + statefulsetName, + startDate, + endDate, + }: { + clusterId: string + statefulsetName: string + startDate: string + endDate: string + }) => ({ + queryKey: ['podNames', clusterId, statefulsetName], + async queryFn() { + const endpoint = `api/v1/label/pod/values?match[]=kube_pod_owner{owner_kind="StatefulSet",owner_name="${statefulsetName}"}` + const response = await clusterApi.getClusterMetrics( + clusterId, + endpoint, + '', + startDate, + endDate, + undefined, + undefined, + undefined, + 'True', + 'True', + undefined, + 'prometheus', + 'false', + 'service_overview', + 'pod_names' + ) + return response.data.metrics && (JSON.parse(response.data.metrics).data as string[]) + }, + }), metrics: ({ clusterId, query, diff --git a/libs/domains/observability/feature/src/index.ts b/libs/domains/observability/feature/src/index.ts index 38ed77972df..310d50b36b9 100644 --- a/libs/domains/observability/feature/src/index.ts +++ b/libs/domains/observability/feature/src/index.ts @@ -17,6 +17,7 @@ export * from './lib/hooks/use-edit-alert-rule/use-edit-alert-rule' export * from './lib/hooks/use-alert-rules/use-alert-rules' export * from './lib/hooks/use-environment/use-environment' export * from './lib/hooks/use-container-name/use-container-name' +export * from './lib/hooks/use-pod-names/use-pod-names' export * from './lib/hooks/use-ingress-name/use-ingress-name' export * from './lib/hooks/use-hpa-name/use-hpa-name' export * from './lib/hooks/use-alerts/use-alerts' diff --git a/libs/domains/observability/feature/src/lib/hooks/use-pod-names/use-pod-names.ts b/libs/domains/observability/feature/src/lib/hooks/use-pod-names/use-pod-names.ts new file mode 100644 index 00000000000..babdda6af25 --- /dev/null +++ b/libs/domains/observability/feature/src/lib/hooks/use-pod-names/use-pod-names.ts @@ -0,0 +1,18 @@ +import { useQuery } from '@tanstack/react-query' +import { observability } from '@qovery/domains/observability/data-access' + +export interface UsePodNamesProps { + clusterId: string + statefulsetName: string + startDate: string + endDate: string + enabled?: boolean +} + +// Retrieves pod names for a StatefulSet using kube_pod_owner +export function usePodNames({ clusterId, statefulsetName, startDate, endDate, enabled = true }: UsePodNamesProps) { + return useQuery({ + ...observability.podNames({ clusterId, statefulsetName, startDate, endDate }), + enabled: enabled && Boolean(clusterId && statefulsetName), + }) +} diff --git a/libs/domains/observability/feature/src/lib/service/service-dashboard/card-instance-status/card-instance-status.tsx b/libs/domains/observability/feature/src/lib/service/service-dashboard/card-instance-status/card-instance-status.tsx index 88a87d1042e..9a40495e8be 100644 --- a/libs/domains/observability/feature/src/lib/service/service-dashboard/card-instance-status/card-instance-status.tsx +++ b/libs/domains/observability/feature/src/lib/service/service-dashboard/card-instance-status/card-instance-status.tsx @@ -58,11 +58,13 @@ export function CardInstanceStatus({ clusterId, containerName, namespace, + podNames, }: { serviceId: string clusterId: string containerName: string namespace: string + podNames?: string[] }) { const { queryTimeRange, subQueryTimeRange, startTimestamp, endTimestamp } = useDashboardContext() const [isModalOpen, setIsModalOpen] = useState(false) @@ -134,6 +136,7 @@ export function CardInstanceStatus({ serviceId={serviceId} containerName={containerName} namespace={namespace} + podNames={podNames} /> @@ -146,6 +149,7 @@ export function CardInstanceStatus({ containerName={containerName} isFullscreen namespace={namespace} + podNames={podNames} /> diff --git a/libs/domains/observability/feature/src/lib/service/service-dashboard/card-storage/card-storage.tsx b/libs/domains/observability/feature/src/lib/service/service-dashboard/card-storage/card-storage.tsx index 3eaa0b0d6f0..73352f2c6c6 100644 --- a/libs/domains/observability/feature/src/lib/service/service-dashboard/card-storage/card-storage.tsx +++ b/libs/domains/observability/feature/src/lib/service/service-dashboard/card-storage/card-storage.tsx @@ -62,7 +62,7 @@ export function CardStorage({ serviceId, clusterId }: { serviceId: string; clust }) const rawValue = Number(metricsPercentage?.data?.result[0]?.value[1]) - const value = Number.isFinite(rawValue) ? Math.round(rawValue) : 0 + const value = Number.isFinite(rawValue) ? rawValue : 0 const maxUsageBytes = Number(metricsMaxStorage?.data?.result[0]?.value[1]) @@ -89,8 +89,14 @@ export function CardStorage({ serviceId, clusterId }: { serviceId: string; clust const totalStorageGiB = value > 0 ? maxUsageGiB / (value / 100) : 0 const title = `${maxUsageDisplay} ${maxUsageUnit} max storage usage` - const description = - value > 0 ? `${value}% of your ${totalStorageGiB.toFixed(1)} GiB storage allowance` : `No storage usage data` + + let description + if (value > 0) { + const displayValue = value < 0.01 ? '< 0.01' : value < 1 ? value.toFixed(2) : Math.round(value).toString() + description = `${displayValue}% of your ${totalStorageGiB.toFixed(1)} GiB storage allowance` + } else { + description = 'No storage usage data' + } return ( <> diff --git a/libs/domains/observability/feature/src/lib/service/service-dashboard/cpu-chart/cpu-chart.tsx b/libs/domains/observability/feature/src/lib/service/service-dashboard/cpu-chart/cpu-chart.tsx index 5dfd1395422..1164168e6e2 100644 --- a/libs/domains/observability/feature/src/lib/service/service-dashboard/cpu-chart/cpu-chart.tsx +++ b/libs/domains/observability/feature/src/lib/service/service-dashboard/cpu-chart/cpu-chart.tsx @@ -5,30 +5,30 @@ import { usePodColor } from '@qovery/shared/util-hooks' import { calculateRateInterval, useMetrics } from '../../../hooks/use-metrics/use-metrics' import { LocalChart } from '../../../local-chart/local-chart' import { addTimeRangePadding } from '../../../util-chart/add-time-range-padding' +import { buildPromSelector } from '../../../util-chart/build-selector' import { convertPodName } from '../../../util-chart/convert-pod-name' import { processMetricsData } from '../../../util-chart/process-metrics-data' import { useDashboardContext } from '../../../util-filter/dashboard-context' -const queryCpuUsage = (rateInterval: string, containerName: string) => ` - sum by (pod) (rate(container_cpu_usage_seconds_total{container="${containerName}"}[${rateInterval}])) -` +const queryCpuUsage = (rateInterval: string, selector: string) => + `sum by (pod) (rate(container_cpu_usage_seconds_total{${selector}}[${rateInterval}]))` -const queryCpuLimit = (containerName: string) => ` - sum (bottomk(1, kube_pod_container_resource_limits{resource="cpu",container="${containerName}"})) -` +const queryCpuLimit = (selector: string) => + `sum (bottomk(1, kube_pod_container_resource_limits{resource="cpu",${selector}}))` -const queryCpuRequest = (containerName: string) => ` - sum (bottomk(1, kube_pod_container_resource_requests{resource="cpu",container="${containerName}"})) -` +const queryCpuRequest = (selector: string) => + `sum (bottomk(1, kube_pod_container_resource_requests{resource="cpu",${selector}}))` export function CpuChart({ clusterId, serviceId, containerName, + podNames, }: { clusterId: string serviceId: string containerName: string + podNames?: string[] }) { const { startTimestamp, endTimestamp, useLocalTime, timeRange } = useDashboardContext() const getColorByPod = usePodColor() @@ -56,9 +56,11 @@ export function CpuChart({ [startTimestamp, endTimestamp] ) + const selector = useMemo(() => buildPromSelector(containerName, podNames), [containerName, podNames]) + const { data: metrics, isLoading: isLoadingMetrics } = useMetrics({ clusterId, - query: queryCpuUsage(rateInterval, containerName), + query: queryCpuUsage(rateInterval, selector), startTimestamp, endTimestamp, timeRange, @@ -68,7 +70,7 @@ export function CpuChart({ const { data: limitMetrics, isLoading: isLoadingLimit } = useMetrics({ clusterId, - query: queryCpuLimit(containerName), + query: queryCpuLimit(selector), startTimestamp, endTimestamp, timeRange, @@ -78,7 +80,7 @@ export function CpuChart({ const { data: requestMetrics, isLoading: isLoadingRequest } = useMetrics({ clusterId, - query: queryCpuRequest(containerName), + query: queryCpuRequest(selector), startTimestamp, endTimestamp, timeRange, diff --git a/libs/domains/observability/feature/src/lib/service/service-dashboard/disk-chart/disk-chart.tsx b/libs/domains/observability/feature/src/lib/service/service-dashboard/disk-chart/disk-chart.tsx index 95b21f8b9f5..5fe8e7e836d 100644 --- a/libs/domains/observability/feature/src/lib/service/service-dashboard/disk-chart/disk-chart.tsx +++ b/libs/domains/observability/feature/src/lib/service/service-dashboard/disk-chart/disk-chart.tsx @@ -3,41 +3,42 @@ import { Line } from 'recharts' import { useMetrics } from '../../../hooks/use-metrics/use-metrics' import { LocalChart } from '../../../local-chart/local-chart' import { addTimeRangePadding } from '../../../util-chart/add-time-range-padding' +import { buildPromSelector } from '../../../util-chart/build-selector' import { processMetricsData } from '../../../util-chart/process-metrics-data' import { useDashboardContext } from '../../../util-filter/dashboard-context' -const queryDiskReadNvme = (containerName: string) => ` - sum by (device) (rate(container_fs_reads_bytes_total{container="${containerName}", device=~"/dev/nvme0.*"}[1m])) -` +const queryDiskReadNvme = (selector: string) => + `sum (rate(container_fs_reads_bytes_total{${selector}, device=~"/dev/nvme.*"}[1m]))` -const queryDiskReadNonNvme = (containerName: string) => ` - sum by (device) (rate(container_fs_reads_bytes_total{container="${containerName}", device!~"/dev/nvme0.*", device!=""}[1m])) -` +const queryDiskReadNonNvme = (selector: string) => + `sum by (device) (rate(container_fs_reads_bytes_total{${selector}, device!~"/dev/nvme.*", device!=""}[1m]))` -const queryDiskWriteNvme = (containerName: string) => ` - sum by (device) (rate(container_fs_writes_bytes_total{container="${containerName}", device=~"/dev/nvme0.*"}[1m])) -` +const queryDiskWriteNvme = (selector: string) => + `sum by (device) (rate(container_fs_writes_bytes_total{${selector}, device=~"/dev/nvme.*"}[1m]))` -const queryDiskWriteNonNvme = (containerName: string) => ` - sum by (device) (rate(container_fs_writes_bytes_total{container=${containerName}"", device!~"/dev/nvme0.*", device!=""}[1m])) -` +const queryDiskWriteNonNvme = (selector: string) => + `sum by (device) (rate(container_fs_writes_bytes_total{${selector}, device!~"/dev/nvme.*", device!=""}[1m]))` export function DiskChart({ clusterId, serviceId, containerName, + podNames, }: { clusterId: string serviceId: string containerName: string + podNames?: string[] }) { const { startTimestamp, endTimestamp, useLocalTime, timeRange } = useDashboardContext() + const selector = useMemo(() => buildPromSelector(containerName, podNames), [containerName, podNames]) + const { data: metricsReadEphemeralStorage, isLoading: isLoadingMetricsReadEphemeralStorage } = useMetrics({ clusterId, startTimestamp, endTimestamp, - query: queryDiskReadNvme(containerName), + query: queryDiskReadNvme(selector), timeRange, boardShortName: 'service_overview', metricShortName: 'disk_chart_read_ephemeral', @@ -47,7 +48,7 @@ export function DiskChart({ clusterId, startTimestamp, endTimestamp, - query: queryDiskReadNonNvme(containerName), + query: queryDiskReadNonNvme(selector), timeRange, boardShortName: 'service_overview', metricShortName: 'disk_chart_read_persistent', @@ -57,7 +58,7 @@ export function DiskChart({ clusterId, startTimestamp, endTimestamp, - query: queryDiskWriteNvme(containerName), + query: queryDiskWriteNvme(selector), timeRange, boardShortName: 'service_overview', metricShortName: 'disk_chart_read_ephemeral', @@ -67,7 +68,7 @@ export function DiskChart({ clusterId, startTimestamp, endTimestamp, - query: queryDiskWriteNonNvme(containerName), + query: queryDiskWriteNonNvme(selector), timeRange, boardShortName: 'service_overview', metricShortName: 'disk_chart_write_persistent', diff --git a/libs/domains/observability/feature/src/lib/service/service-dashboard/instance-status-chart/instance-status-chart.tsx b/libs/domains/observability/feature/src/lib/service/service-dashboard/instance-status-chart/instance-status-chart.tsx index 0662dba7471..8323d800f8f 100644 --- a/libs/domains/observability/feature/src/lib/service/service-dashboard/instance-status-chart/instance-status-chart.tsx +++ b/libs/domains/observability/feature/src/lib/service/service-dashboard/instance-status-chart/instance-status-chart.tsx @@ -3,6 +3,7 @@ import { Area, Line, ReferenceLine } from 'recharts' import { calculateDynamicRange, calculateRateInterval, useMetrics } from '../../../hooks/use-metrics/use-metrics' import { LocalChart, type ReferenceLineEvent } from '../../../local-chart/local-chart' import { addTimeRangePadding } from '../../../util-chart/add-time-range-padding' +import { buildPromSelector } from '../../../util-chart/build-selector' import { formatTimestamp } from '../../../util-chart/format-timestamp' import { processMetricsData } from '../../../util-chart/process-metrics-data' import { useDashboardContext } from '../../../util-filter/dashboard-context' @@ -15,20 +16,20 @@ sum by (condition) ( ) ` -const queryRestartWithReason = (containerName: string, timeRange: string) => ` -sum by (reason) ( - sum by (pod) ( - increase(kube_pod_container_status_restarts_total{container="${containerName}"}[${timeRange}]) - ) - * - on(pod) group_left(reason) - sum by (pod, reason) ( - max without(instance, job, endpoint, service, prometheus, uid) ( - kube_pod_container_status_last_terminated_reason{container="${containerName}"} - ) - ) -) -` +const queryRestartWithReason = (selector: string, timeRange: string) => ` + sum by (reason) ( + sum by (pod) ( + increase(kube_pod_container_status_restarts_total{${selector}}[${timeRange}]) + ) + * + on(pod) group_left(reason) + sum by (pod, reason) ( + max without(instance, job, endpoint, service, prometheus, uid) ( + kube_pod_container_status_last_terminated_reason{${selector}} + ) + ) + ) + ` const queryK8sEvent = (serviceId: string, dynamicRange: string) => ` sum by (pod,reason)( @@ -199,12 +200,14 @@ export function InstanceStatusChart({ containerName, isFullscreen, namespace, + podNames, }: { clusterId: string serviceId: string containerName: string namespace: string isFullscreen?: boolean + podNames?: string[] }) { const { startTimestamp, endTimestamp, useLocalTime, hideEvents, timeRange } = useDashboardContext() @@ -230,6 +233,8 @@ export function InstanceStatusChart({ metricShortName: 'instance_status_health', }) + const selector = useMemo(() => buildPromSelector(containerName, podNames), [containerName, podNames]) + const { data: metricsRestartsWithReason, isLoading: isLoadingMetricsRestartsWithReason, @@ -239,7 +244,7 @@ export function InstanceStatusChart({ startTimestamp, endTimestamp, timeRange, - query: queryRestartWithReason(containerName, timeRange), + query: queryRestartWithReason(selector, timeRange), boardShortName: 'service_overview', metricShortName: 'instance_status_restart', }) diff --git a/libs/domains/observability/feature/src/lib/service/service-dashboard/memory-chart/memory-chart.tsx b/libs/domains/observability/feature/src/lib/service/service-dashboard/memory-chart/memory-chart.tsx index 26c95ba4f80..75f63c30933 100644 --- a/libs/domains/observability/feature/src/lib/service/service-dashboard/memory-chart/memory-chart.tsx +++ b/libs/domains/observability/feature/src/lib/service/service-dashboard/memory-chart/memory-chart.tsx @@ -5,30 +5,29 @@ import { usePodColor } from '@qovery/shared/util-hooks' import { useMetrics } from '../../../hooks/use-metrics/use-metrics' import { LocalChart } from '../../../local-chart/local-chart' import { addTimeRangePadding } from '../../../util-chart/add-time-range-padding' +import { buildPromSelector } from '../../../util-chart/build-selector' import { convertPodName } from '../../../util-chart/convert-pod-name' import { processMetricsData } from '../../../util-chart/process-metrics-data' import { useDashboardContext } from '../../../util-filter/dashboard-context' -const queryMemoryUsage = (containerName: string) => ` - sum by (pod) (container_memory_working_set_bytes{container="${containerName}"}) -` +const queryMemoryUsage = (selector: string) => `sum by (pod) (container_memory_working_set_bytes{${selector}})` -const queryMemoryLimit = (containerName: string) => ` - sum (bottomk(1, kube_pod_container_resource_limits{resource="memory", container="${containerName}"})) -` +const queryMemoryLimit = (selector: string) => + `sum (bottomk(1, kube_pod_container_resource_limits{resource="memory", ${selector}}))` -const queryMemoryRequest = (containerName: string) => ` - sum (bottomk(1, kube_pod_container_resource_requests{resource="memory", container="${containerName}"})) -` +const queryMemoryRequest = (selector: string) => + `sum (bottomk(1, kube_pod_container_resource_requests{resource="memory", ${selector}}))` export function MemoryChart({ clusterId, serviceId, containerName, + podNames, }: { clusterId: string serviceId: string containerName: string + podNames?: string[] }) { const { startTimestamp, endTimestamp, useLocalTime, timeRange } = useDashboardContext() const getColorByPod = usePodColor() @@ -51,11 +50,13 @@ export function MemoryChart({ setLegendSelectedKeys(new Set()) } + const selector = useMemo(() => buildPromSelector(containerName, podNames), [containerName, podNames]) + const { data: metrics, isLoading: isLoadingMetrics } = useMetrics({ clusterId, startTimestamp, endTimestamp, - query: queryMemoryUsage(containerName), + query: queryMemoryUsage(selector), timeRange, boardShortName: 'service_overview', metricShortName: 'memory', @@ -65,7 +66,7 @@ export function MemoryChart({ clusterId, startTimestamp, endTimestamp, - query: queryMemoryLimit(containerName), + query: queryMemoryLimit(selector), timeRange, boardShortName: 'service_overview', metricShortName: 'memory_limit', @@ -75,7 +76,7 @@ export function MemoryChart({ clusterId, startTimestamp, endTimestamp, - query: queryMemoryRequest(containerName), + query: queryMemoryRequest(selector), timeRange, boardShortName: 'service_overview', metricShortName: 'memory_request', diff --git a/libs/domains/observability/feature/src/lib/service/service-dashboard/service-dashboard.tsx b/libs/domains/observability/feature/src/lib/service/service-dashboard/service-dashboard.tsx index b6630f7f206..93ab304933a 100644 --- a/libs/domains/observability/feature/src/lib/service/service-dashboard/service-dashboard.tsx +++ b/libs/domains/observability/feature/src/lib/service/service-dashboard/service-dashboard.tsx @@ -1,12 +1,15 @@ import clsx from 'clsx' import { subHours } from 'date-fns' +import { DatabaseModeEnum } from 'qovery-typescript-axios' import { useParams } from 'react-router-dom' +import { type Database } from '@qovery/domains/services/data-access' import { useService } from '@qovery/domains/services/feature' import { Button, Callout, Chart, Heading, Icon, InputSelectSmall, Section, Tooltip } from '@qovery/shared/ui' import { useContainerName } from '../../hooks/use-container-name/use-container-name' import { useEnvironment } from '../../hooks/use-environment/use-environment' import { useIngressName } from '../../hooks/use-ingress-name/use-ingress-name' import { useNamespace } from '../../hooks/use-namespace/use-namespace' +import { usePodNames } from '../../hooks/use-pod-names/use-pod-names' import { DashboardProvider, useDashboardContext } from '../../util-filter/dashboard-context' import { CardHTTPErrors } from './card-http-errors/card-http-errors' import { CardInstanceStatus } from './card-instance-status/card-instance-status' @@ -27,9 +30,10 @@ import { PrivateNetworkRequestStatusChart } from './private-network-request-stat import { SelectTimeRange } from './select-time-range/select-time-range' function ServiceDashboardContent() { - const { environmentId = '', applicationId = '' } = useParams() + const { environmentId = '', applicationId = '', databaseId = '' } = useParams() - const { data: service } = useService({ serviceId: applicationId }) + const serviceId = applicationId || databaseId + const { data: service } = useService({ serviceId }) const { data: environment } = useEnvironment({ environmentId }) const { expandCharts, @@ -55,23 +59,39 @@ function ServiceDashboardContent() { (service?.serviceType === 'CONTAINER' && (service?.ports || []).some((port) => !port.publicly_accessible))) const hasStorage = - (service?.serviceType === 'CONTAINER' || service?.serviceType === 'APPLICATION') && - (service.storage || []).length > 0 + service?.serviceType === 'DATABASE' || + ((service?.serviceType === 'CONTAINER' || service?.serviceType === 'APPLICATION') && + Array.isArray(service.storage) && + service.storage.length > 0) const now = new Date() const oneHourAgo = subHours(now, 1) const { data: containerName, isFetched: isFetchedContainerName } = useContainerName({ clusterId: environment?.cluster_id ?? '', - serviceId: applicationId, + serviceId: serviceId, resourceType: hasStorage ? 'statefulset' : 'deployment', startDate: oneHourAgo.toISOString(), endDate: now.toISOString(), }) + // For container databases, retrieve pod names via kube_pod_owner + const isContainerDatabase = + service?.serviceType === 'DATABASE' && (service as Database)?.mode === DatabaseModeEnum.CONTAINER + + const { data: podNamesData, isFetched: isFetchedPodNames } = usePodNames({ + clusterId: environment?.cluster_id ?? '', + statefulsetName: containerName ?? '', + startDate: oneHourAgo.toISOString(), + endDate: now.toISOString(), + enabled: isContainerDatabase, + }) + + const podNames = isContainerDatabase && Array.isArray(podNamesData) ? podNamesData : [] + const { data: namespace, isFetched: isFetchedNamespace } = useNamespace({ clusterId: environment?.cluster_id ?? '', - serviceId: applicationId, + serviceId: serviceId, resourceType: hasStorage ? 'statefulset' : 'deployment', startDate: oneHourAgo.toISOString(), endDate: now.toISOString(), @@ -79,7 +99,7 @@ function ServiceDashboardContent() { const { data: ingressName = '' } = useIngressName({ clusterId: environment?.cluster_id ?? '', - serviceId: applicationId, + serviceId: serviceId, enabled: hasPublicPort, startDate: oneHourAgo.toISOString(), endDate: now.toISOString(), @@ -113,7 +133,8 @@ function ServiceDashboardContent() { ) } - if (!environment || !service || !containerName || !namespace) + // For container databases, wait for podNames fetch to settle (even if empty) + if (!environment || !service || !containerName || !namespace || (isContainerDatabase && !isFetchedPodNames)) return (