diff --git a/CBS-Azure-Solutions/vm-insights-disk-performance/README.md b/CBS-Azure-Solutions/vm-insights-disk-performance/README.md new file mode 100644 index 0000000..ca4cdbf --- /dev/null +++ b/CBS-Azure-Solutions/vm-insights-disk-performance/README.md @@ -0,0 +1,66 @@ +# VM Disk Capacity & Performance Report + +A KQL query for Azure Monitor Logs that produces a per-drive, per-VM report of disk capacity and peak performance metrics — using data collected by [VM Insights](https://learn.microsoft.com/en-us/azure/azure-monitor/vm/vminsights-overview). + +## What it does + +The query correlates `InsightsMetrics` data to produce a single row per drive per VM showing: + +- **Capacity** — disk size, used/free space, percent used +- **Peak Write snapshot** — the moment of highest write throughput, with all other metrics (read MB/s, read/write IOPS, read/write latency) captured at that same timestamp +- **Peak Read snapshot** — same idea, anchored to the moment of highest read throughput + +The `PW_` and `PR_` column prefixes indicate which peak the correlated values belong to. Null values mean a metric sample didn't land at that exact timestamp. + +Drives are classified as `OS`, `Temp`, or `Data` based on mount point and size. Ephemeral/system mounts (`/mnt`, `/mnt/resource`, `/snap/*`, `/boot`, `/sys/*`) are excluded automatically. + +## Prerequisites + +- **VM Insights** must be enabled on target VMs — this is where the `InsightsMetrics` table comes from +- **Log Analytics workspace(s)** receiving the VM Insights data +- Permissions to query the workspace(s) via Azure Monitor Logs + +## How to run + +1. In the Azure portal, navigate to **Monitor → Logs** +2. Switch the editor to **KQL mode** (drop-down in the query toolbar) +3. Paste the contents of [`vm-disk-performance-capacity.kql`](vm-disk-performance-capacity.kql) +Screenshot 2026-03-23 at 3 42 51 pm + +4. **Set the scope** — click the kebab menu (⋯) on the query tab and select **Change scope** + - To query across all subscriptions: select each subscription + - To narrow results: filter **Resource types** to `Log Analytics workspace` and select only the relevant workspace(s) +5. Set the **Time range** (e.g. Last 24 hours) and click **Run** + +## Exporting results + +Click **Share → Export to CSV (all columns)** to download the full result set for offline analysis or import into a TCO model. + +## Output columns + +| Column | Description | +|--------|-------------| +| `SubscriptionId` | Azure subscription GUID | +| `ResourceGroup` | VM resource group | +| `Computer` | VM hostname | +| `Drive` | Mount point / drive letter | +| `DriveType` | `OS`, `Temp`, or `Data` | +| `DiskSizeGB` | Total disk capacity | +| `UsedSpaceGB` / `FreeSpaceGB` | Average used and free space over the time range | +| `PctUsed` | Percent used | +| `PeakWriteTime` | Timestamp of maximum write throughput | +| `MaxWriteMBps` | Peak write throughput (MB/s) | +| `PW_ReadMBps` | Read throughput at peak write time | +| `PW_WriteIOPS` / `PW_ReadIOPS` | IOPS at peak write time | +| `PW_WriteLatMs` / `PW_ReadLatMs` | Latency at peak write time | +| `PeakReadTime` | Timestamp of maximum read throughput | +| `MaxReadMBps` | Peak read throughput (MB/s) | +| `PR_WriteMBps` | Write throughput at peak read time | +| `PR_ReadIOPS` / `PR_WriteIOPS` | IOPS at peak read time | +| `PR_ReadLatMs` / `PR_WriteLatMs` | Latency at peak read time | + +## Files + +| File | Description | +|------|-------------| +| [`vm-disk-performance-capacity.kql`](vm-disk-performance-capacity.kql) | The KQL query — paste directly into Azure Monitor Logs | diff --git a/CBS-Azure-Solutions/vm-insights-disk-performance/vm-disk-performance-capacity.kql b/CBS-Azure-Solutions/vm-insights-disk-performance/vm-disk-performance-capacity.kql new file mode 100644 index 0000000..ef720e2 --- /dev/null +++ b/CBS-Azure-Solutions/vm-insights-disk-performance/vm-disk-performance-capacity.kql @@ -0,0 +1,104 @@ +//====================================================================== +// VM Disk Performance & Capacity Report +// Scope: All subscriptions selected in the Log Analytics workspace +// Shows correlated performance metrics at the moment of peak read +// and peak write throughput for each drive on each VM +// +// COLUMNS: +// PW_ prefix = correlated value at peak WRITE time +// PR_ prefix = correlated value at peak READ time +// Null values = metric sample didn't align at that exact timestamp +//====================================================================== +let baseData = InsightsMetrics // All perf metrics, filtered to real drives +| where Namespace == "LogicalDisk" and Name in ("ReadBytesPerSecond", "WriteBytesPerSecond", "ReadsPerSecond", "WritesPerSecond", "ReadLatencyMs", "WriteLatencyMs") +| extend DiskDetails = parse_json(Tags) +| extend Drive = tostring(DiskDetails["vm.azm.ms/mountId"]) +| where Drive !in ("", "/mnt", "/mnt/resource") +| where Drive !startswith "/snap/" +| where Drive !startswith "/boot" +| where Drive !startswith "/sys/"; +let driveInfo = InsightsMetrics // Capacity data (size, used, free, pct used) +| where Namespace == "LogicalDisk" and Name == "FreeSpaceMB" +| extend DiskDetails = parse_json(Tags) +| extend Drive = tostring(DiskDetails["vm.azm.ms/mountId"]) +| extend DiskSizeMB = todecimal(DiskDetails["vm.azm.ms/diskSizeMB"]) +| where Drive !in ("", "/mnt", "/mnt/resource") +| where Drive !startswith "/snap/" +| where Drive !startswith "/boot" +| where Drive !startswith "/sys/" +| summarize FreeSpaceMB = avg(Val), DiskSizeMB = max(DiskSizeMB) by Computer, Drive, _ResourceId +| extend UsedSpaceMB = DiskSizeMB - FreeSpaceMB +| extend FreeSpaceGB = round(FreeSpaceMB / 1024, 2) +| extend UsedSpaceGB = round(UsedSpaceMB / 1024, 2) +| extend DiskSizeGB = round(DiskSizeMB / 1024, 2) +| extend PctUsed = round((UsedSpaceMB / DiskSizeMB) * 100, 1); +let peakWrite = baseData // Timestamp and value of max write throughput +| where Name == "WriteBytesPerSecond" +| summarize arg_max(Val, TimeGenerated) by Computer, Drive, _ResourceId +| project Computer, Drive, _ResourceId, PeakWriteTime = TimeGenerated, MaxWriteMBps = round(Val / 1048576, 2); +let peakRead = baseData // Timestamp and value of max read throughput +| where Name == "ReadBytesPerSecond" +| summarize arg_max(Val, TimeGenerated) by Computer, Drive, _ResourceId +| project Computer, Drive, _ResourceId, PeakReadTime = TimeGenerated, MaxReadMBps = round(Val / 1048576, 2); +let allMetrics = baseData // Flattened lookup table for timestamp correlation +| extend MBps = round(Val / 1048576, 2) +| extend RawVal = Val +| project Computer, Drive, _ResourceId, TimeGenerated, Name, MBps, RawVal; +peakWrite // Assembly: join all blocks and correlate metrics at peak times +| join kind=leftouter peakRead on Computer, Drive, _ResourceId +| join kind=leftouter driveInfo on Computer, Drive, _ResourceId +| extend SubscriptionId = tostring(split(_ResourceId, "/")[2]) +| extend ResourceGroup = tostring(split(_ResourceId, "/")[4]) +| extend DriveType = case( + Drive == "/" or Drive == "C:", "OS", + Drive == "D:" and DiskSizeGB <= 16, "Temp", + "Data" +) +// Correlated metrics at PEAK WRITE time (PW_ prefix) +| join kind=leftouter ( + allMetrics | where Name == "ReadBytesPerSecond" + | project Computer, Drive, _ResourceId, TimeGenerated, PW_ReadMBps = MBps +) on Computer, Drive, _ResourceId, $left.PeakWriteTime == $right.TimeGenerated +| join kind=leftouter ( + allMetrics | where Name == "ReadsPerSecond" + | project Computer, Drive, _ResourceId, TimeGenerated, PW_ReadIOPS = round(RawVal, 0) +) on Computer, Drive, _ResourceId, $left.PeakWriteTime == $right.TimeGenerated +| join kind=leftouter ( + allMetrics | where Name == "WritesPerSecond" + | project Computer, Drive, _ResourceId, TimeGenerated, PW_WriteIOPS = round(RawVal, 0) +) on Computer, Drive, _ResourceId, $left.PeakWriteTime == $right.TimeGenerated +| join kind=leftouter ( + allMetrics | where Name == "ReadLatencyMs" + | project Computer, Drive, _ResourceId, TimeGenerated, PW_ReadLatMs = round(RawVal, 2) +) on Computer, Drive, _ResourceId, $left.PeakWriteTime == $right.TimeGenerated +| join kind=leftouter ( + allMetrics | where Name == "WriteLatencyMs" + | project Computer, Drive, _ResourceId, TimeGenerated, PW_WriteLatMs = round(RawVal, 2) +) on Computer, Drive, _ResourceId, $left.PeakWriteTime == $right.TimeGenerated +// Correlated metrics at PEAK READ time (PR_ prefix) +| join kind=leftouter ( + allMetrics | where Name == "WriteBytesPerSecond" + | project Computer, Drive, _ResourceId, TimeGenerated, PR_WriteMBps = MBps +) on Computer, Drive, _ResourceId, $left.PeakReadTime == $right.TimeGenerated +| join kind=leftouter ( + allMetrics | where Name == "ReadsPerSecond" + | project Computer, Drive, _ResourceId, TimeGenerated, PR_ReadIOPS = round(RawVal, 0) +) on Computer, Drive, _ResourceId, $left.PeakReadTime == $right.TimeGenerated +| join kind=leftouter ( + allMetrics | where Name == "WritesPerSecond" + | project Computer, Drive, _ResourceId, TimeGenerated, PR_WriteIOPS = round(RawVal, 0) +) on Computer, Drive, _ResourceId, $left.PeakReadTime == $right.TimeGenerated +| join kind=leftouter ( + allMetrics | where Name == "ReadLatencyMs" + | project Computer, Drive, _ResourceId, TimeGenerated, PR_ReadLatMs = round(RawVal, 2) +) on Computer, Drive, _ResourceId, $left.PeakReadTime == $right.TimeGenerated +| join kind=leftouter ( + allMetrics | where Name == "WriteLatencyMs" + | project Computer, Drive, _ResourceId, TimeGenerated, PR_WriteLatMs = round(RawVal, 2) +) on Computer, Drive, _ResourceId, $left.PeakReadTime == $right.TimeGenerated +// Output: Identity > Capacity > Peak Write snapshot > Peak Read snapshot +| project SubscriptionId, ResourceGroup, Computer, Drive, DriveType, DiskSizeGB, UsedSpaceGB, FreeSpaceGB, PctUsed, + PeakWriteTime, MaxWriteMBps, PW_ReadMBps, PW_WriteIOPS, PW_ReadIOPS, PW_WriteLatMs, PW_ReadLatMs, + PeakReadTime, MaxReadMBps, PR_WriteMBps, PR_ReadIOPS, PR_WriteIOPS, PR_ReadLatMs, PR_WriteLatMs, + _ResourceId +| order by SubscriptionId asc, Computer asc, Drive asc