Skip to content
This repository has been archived by the owner on Oct 24, 2023. It is now read-only.

Updating containermonitoring addon to use the latest agent #325

Merged
merged 2 commits into from
Jan 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
186 changes: 143 additions & 43 deletions parts/k8s/containeraddons/kubernetesmasteraddons-omsagent-daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,43 +50,143 @@ roleRef:
---
kind: ConfigMap
apiVersion: v1
data:
kube.conf: "# Fluentd config file for OMS Docker - cluster components (kubeAPI)\r\n\r\n#Kubernetes
pod inventory\r\n<source>\r\n\ttype kubepodinventory\r\n\ttag oms.containerinsights.KubePodInventory\r\n\trun_interval
60s\r\n log_level debug\r\n</source>\r\n\r\n#Kubernetes events\r\n<source>\r\n\ttype
kubeevents\r\n\ttag oms.api.KubeEvents.CollectionTime\r\n\trun_interval 60s\r\n
\ log_level debug\r\n</source>\r\n\r\n#Kubernetes logs\r\n<source>\r\n\ttype kubelogs\r\n\ttag
oms.api.KubeLogs\r\n\trun_interval 60s\r\n</source>\r\n\r\n#Kubernetes services\r\n<source>\r\n\ttype
kubeservices\r\n\ttag oms.api.KubeServices.CollectionTime\r\n\trun_interval 60s\r\n
\ log_level debug\r\n</source>\r\n\r\n#Kubernetes Nodes\r\n<source>\r\n\ttype
kubenodeinventory\r\n\ttag oms.containerinsights.KubeNodeInventory\r\n\trun_interval
60s\r\n log_level debug\r\n</source>\r\n\r\n#Kubernetes perf\r\n<source>\r\n\ttype
kubeperf\r\n\ttag oms.api.KubePerf\r\n\trun_interval 60s\r\n log_level debug\r\n</source>\r\n\r\n<match
oms.containerinsights.KubePodInventory**>\r\n type out_oms\r\n log_level debug\r\n
\ num_threads 5\r\n buffer_chunk_limit 20m\r\n buffer_type file\r\n buffer_path
%STATE_DIR_WS%/out_oms_kubepods*.buffer\r\n buffer_queue_limit 20\r\n buffer_queue_full_action
drop_oldest_chunk\r\n flush_interval 20s\r\n retry_limit 10\r\n retry_wait
30s\r\n max_retry_wait 9m\r\n</match>\r\n\r\n<match oms.api.KubeEvents**>\r\n\ttype
out_oms_api\r\n\tlog_level debug\r\n num_threads 5\r\n\tbuffer_chunk_limit 5m\r\n\tbuffer_type
file\r\n\tbuffer_path %STATE_DIR_WS%/out_oms_api_kubeevents*.buffer\r\n\tbuffer_queue_limit
10\r\n buffer_queue_full_action drop_oldest_chunk\r\n\tflush_interval 20s\r\n\tretry_limit
10\r\n\tretry_wait 30s\r\n</match>\r\n\r\n<match oms.api.KubeLogs**>\r\n\ttype
out_oms_api\r\n\tlog_level debug\r\n buffer_chunk_limit 10m\r\n\tbuffer_type
file\r\n\tbuffer_path %STATE_DIR_WS%/out_oms_api_kubernetes_logs*.buffer\r\n\tbuffer_queue_limit
10\r\n\tflush_interval 20s\r\n\tretry_limit 10\r\n\tretry_wait 30s\r\n</match>\r\n\r\n<match
oms.api.KubeServices**>\t \r\n type out_oms_api\r\n log_level debug\r\n num_threads
5\r\n buffer_chunk_limit 20m\r\n buffer_type file\r\n buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer\r\n
\ buffer_queue_limit 20\r\n buffer_queue_full_action drop_oldest_chunk\r\n flush_interval
20s\r\n retry_limit 10\r\n retry_wait 30s\r\n max_retry_wait 9m\r\n</match>\r\n\r\n<match
oms.containerinsights.KubeNodeInventory**>\r\n type out_oms\r\n log_level debug\r\n
\ num_threads 5\r\n buffer_chunk_limit 20m\r\n buffer_type file\r\n buffer_path
%STATE_DIR_WS%/state/out_oms_kubenodes*.buffer\r\n buffer_queue_limit 20\r\n
\ buffer_queue_full_action drop_oldest_chunk\r\n flush_interval 20s\r\n retry_limit
10\r\n retry_wait 30s\r\n max_retry_wait 9m\r\n</match>\r\n\r\n<match oms.api.KubePerf**>\t\r\n
\ type out_oms\r\n log_level debug\r\n num_threads 5\r\n buffer_chunk_limit
20m\r\n buffer_type file\r\n buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer\r\n
\ buffer_queue_limit 20\r\n buffer_queue_full_action drop_oldest_chunk\r\n flush_interval
20s\r\n retry_limit 10\r\n retry_wait 30s\r\n max_retry_wait 9m\r\n</match>\r\n"
data:
kube.conf: |-
# Fluentd config file for OMS Docker - cluster components (kubeAPI)
#Kubernetes pod inventory
<source>
type kubepodinventory
tag oms.containerinsights.KubePodInventory
run_interval 60s
log_level debug
</source>
#Kubernetes events
<source>
type kubeevents
tag oms.containerinsights.KubeEvents
run_interval 60s
log_level debug
</source>
#Kubernetes logs
<source>
type kubelogs
tag oms.api.KubeLogs
run_interval 60s
</source>
#Kubernetes services
<source>
type kubeservices
tag oms.containerinsights.KubeServices
run_interval 60s
log_level debug
</source>
#Kubernetes Nodes
<source>
type kubenodeinventory
tag oms.containerinsights.KubeNodeInventory
run_interval 60s
log_level debug
</source>
#Kubernetes perf
<source>
type kubeperf
tag oms.api.KubePerf
run_interval 60s
log_level debug
</source>
<match oms.containerinsights.KubePodInventory**>
type out_oms
log_level debug
num_threads 5
buffer_chunk_limit 20m
buffer_type file
buffer_path %STATE_DIR_WS%/out_oms_kubepods*.buffer
buffer_queue_limit 20
buffer_queue_full_action drop_oldest_chunk
flush_interval 20s
retry_limit 10
retry_wait 30s
max_retry_wait 9m
</match>
<match oms.containerinsights.KubeEvents**>
type out_oms
log_level debug
num_threads 5
buffer_chunk_limit 5m
buffer_type file
buffer_path %STATE_DIR_WS%/out_oms_kubeevents*.buffer
buffer_queue_limit 10
buffer_queue_full_action drop_oldest_chunk
flush_interval 20s
retry_limit 10
retry_wait 30s
max_retry_wait 9m
</match>
<match oms.api.KubeLogs**>
type out_oms_api
log_level debug
buffer_chunk_limit 10m
buffer_type file
buffer_path %STATE_DIR_WS%/out_oms_api_kubernetes_logs*.buffer
buffer_queue_limit 10
flush_interval 20s
retry_limit 10
retry_wait 30s
</match>
<match oms.containerinsights.KubeServices**>
type out_oms
log_level debug
num_threads 5
buffer_chunk_limit 20m
buffer_type file
buffer_path %STATE_DIR_WS%/out_oms_kubeservices*.buffer
buffer_queue_limit 20
buffer_queue_full_action drop_oldest_chunk
flush_interval 20s
retry_limit 10
retry_wait 30s
max_retry_wait 9m
</match>
<match oms.containerinsights.KubeNodeInventory**>
type out_oms
log_level debug
num_threads 5
buffer_chunk_limit 20m
buffer_type file
buffer_path %STATE_DIR_WS%/state/out_oms_kubenodes*.buffer
buffer_queue_limit 20
buffer_queue_full_action drop_oldest_chunk
flush_interval 20s
retry_limit 10
retry_wait 30s
max_retry_wait 9m
</match>
<match oms.api.ContainerNodeInventory**>
type out_oms_api
log_level debug
buffer_chunk_limit 20m
buffer_type file
buffer_path %STATE_DIR_WS%/out_oms_containernodeinventory*.buffer
buffer_queue_limit 20
flush_interval 20s
retry_limit 10
retry_wait 15s
max_retry_wait 9m
</match>
<match oms.api.KubePerf**>
type out_oms
log_level debug
num_threads 5
buffer_chunk_limit 20m
buffer_type file
buffer_path %STATE_DIR_WS%/out_oms_kubeperf*.buffer
buffer_queue_limit 20
buffer_queue_full_action drop_oldest_chunk
flush_interval 20s
retry_limit 10
retry_wait 30s
max_retry_wait 9m
</match>
metadata:
name: omsagent-rs-config
namespace: kube-system
Expand Down Expand Up @@ -136,7 +236,7 @@ spec:
fieldRef:
fieldPath: status.hostIP
- name: ACS_RESOURCE_NAME
value: "my_acs_cluster_name"
value: "aks-engine-cluster"
- name: DISABLE_KUBE_SYSTEM_LOG_COLLECTION
value: "true"
- name: ISTEST
Expand All @@ -157,7 +257,7 @@ spec:
securityContext:
privileged: true
volumeMounts:
- mountPath: /var/run/docker.sock
- mountPath: /var/run/host
name: docker-sock
- mountPath: /var/log
name: host-log
Expand All @@ -176,7 +276,7 @@ spec:
volumes:
- name: docker-sock
hostPath:
path: /var/run/docker.sock
path: /var/run
- name: container-hostname
hostPath:
path: /etc/hostname
Expand Down Expand Up @@ -235,7 +335,7 @@ spec:
fieldRef:
fieldPath: status.hostIP
- name: ACS_RESOURCE_NAME
value: my_acs_cluster_name
value: "aks-engine-cluster"
- name: DISABLE_KUBE_SYSTEM_LOG_COLLECTION
value: "true"
- name: ISTEST
Expand All @@ -248,7 +348,7 @@ spec:
- containerPort: 25224
protocol: UDP
volumeMounts:
- mountPath: /var/run/docker.sock
- mountPath: /var/run/host
name: docker-sock
- mountPath: /var/log
name: host-log
Expand All @@ -273,7 +373,7 @@ spec:
volumes:
- name: docker-sock
hostPath:
path: /var/run/docker.sock
path: /var/run
- name: container-hostname
hostPath:
path: /etc/hostname
Expand Down
6 changes: 3 additions & 3 deletions pkg/api/addons.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,8 @@ func (cs *ContainerService) setAddonsConfig(isUpdate bool) {
Name: ContainerMonitoringAddonName,
Enabled: to.BoolPtr(DefaultContainerMonitoringAddonEnabled),
Config: map[string]string{
"omsAgentVersion": "1.6.0-42",
"dockerProviderVersion": "2.0.0-3",
"omsAgentVersion": "1.8.1.256",
"dockerProviderVersion": "3.0.0-3",
},
Containers: []KubernetesContainerSpec{
{
Expand All @@ -206,7 +206,7 @@ func (cs *ContainerService) setAddonsConfig(isUpdate bool) {
MemoryRequests: "200Mi",
CPULimits: "150m",
MemoryLimits: "750Mi",
Image: "microsoft/oms:ciprod11292018",
Image: "microsoft/oms:ciprod01092019",
},
},
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/api/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ func TestAssignDefaultAddonImages(t *testing.T) {
DefaultReschedulerAddonName: "k8s.gcr.io/rescheduler:v0.3.1",
DefaultMetricsServerAddonName: "k8s.gcr.io/metrics-server-amd64:v0.2.1",
NVIDIADevicePluginAddonName: "nvidia/k8s-device-plugin:1.10",
ContainerMonitoringAddonName: "microsoft/oms:ciprod11292018",
ContainerMonitoringAddonName: "microsoft/oms:ciprod01092019",
IPMASQAgentAddonName: "k8s.gcr.io/ip-masq-agent-amd64:v2.0.0",
AzureCNINetworkMonitoringAddonName: "containernetworking/networkmonitor:v0.0.4",
DefaultDNSAutoscalerAddonName: "k8s.gcr.io/cluster-proportional-autoscaler-amd64:1.1.1",
Expand Down
16 changes: 8 additions & 8 deletions pkg/api/k8s_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ var k8sComponentVersions = map[string]map[string]string{
"tiller": "tiller:v2.11.0",
"rescheduler": "rescheduler:v0.4.0",
"aci-connector": "virtual-kubelet:latest",
ContainerMonitoringAddonName: "oms:ciprod10162018-2",
ContainerMonitoringAddonName: "oms:ciprod01092019",
AzureCNINetworkMonitoringAddonName: "networkmonitor:v0.0.4",
"cluster-autoscaler": "cluster-autoscaler:v1.13.1",
NVIDIADevicePluginAddonName: "k8s-device-plugin:1.11",
Expand Down Expand Up @@ -61,7 +61,7 @@ var k8sComponentVersions = map[string]map[string]string{
"tiller": "tiller:v2.11.0",
"rescheduler": "rescheduler:v0.4.0",
"aci-connector": "virtual-kubelet:latest",
ContainerMonitoringAddonName: "oms:ciprod10162018-2",
ContainerMonitoringAddonName: "oms:ciprod01092019",
AzureCNINetworkMonitoringAddonName: "networkmonitor:v0.0.4",
"cluster-autoscaler": "cluster-autoscaler:v1.12.1",
NVIDIADevicePluginAddonName: "k8s-device-plugin:1.11",
Expand Down Expand Up @@ -93,7 +93,7 @@ var k8sComponentVersions = map[string]map[string]string{
"tiller": "tiller:v2.11.0",
"rescheduler": "rescheduler:v0.4.0",
"aci-connector": "virtual-kubelet:latest",
ContainerMonitoringAddonName: "oms:ciprod10162018-2",
ContainerMonitoringAddonName: "oms:ciprod01092019",
AzureCNINetworkMonitoringAddonName: "networkmonitor:v0.0.4",
"cluster-autoscaler": "cluster-autoscaler:v1.3.4",
NVIDIADevicePluginAddonName: "k8s-device-plugin:1.11",
Expand Down Expand Up @@ -125,7 +125,7 @@ var k8sComponentVersions = map[string]map[string]string{
"tiller": "tiller:v2.11.0",
"rescheduler": "rescheduler:v0.3.1",
"aci-connector": "virtual-kubelet:latest",
ContainerMonitoringAddonName: "oms:ciprod10162018-2",
ContainerMonitoringAddonName: "oms:ciprod01092019",
AzureCNINetworkMonitoringAddonName: "networkmonitor:v0.0.4",
"cluster-autoscaler": "cluster-autoscaler:v1.2.2",
NVIDIADevicePluginAddonName: "k8s-device-plugin:1.10",
Expand Down Expand Up @@ -157,7 +157,7 @@ var k8sComponentVersions = map[string]map[string]string{
"tiller": "tiller:v2.11.0",
"rescheduler": "rescheduler:v0.3.1",
"aci-connector": "virtual-kubelet:latest",
ContainerMonitoringAddonName: "oms:ciprod10162018-2",
ContainerMonitoringAddonName: "oms:ciprod01092019",
AzureCNINetworkMonitoringAddonName: "networkmonitor:v0.0.4",
"cluster-autoscaler": "cluster-autoscaler:v1.1.2",
"k8s-dns-sidecar": "k8s-dns-sidecar-amd64:1.14.7",
Expand Down Expand Up @@ -188,7 +188,7 @@ var k8sComponentVersions = map[string]map[string]string{
"tiller": "tiller:v2.11.0",
"rescheduler": "rescheduler:v0.3.1",
"aci-connector": "virtual-kubelet:latest",
ContainerMonitoringAddonName: "oms:ciprod10162018-2",
ContainerMonitoringAddonName: "oms:ciprod01092019",
AzureCNINetworkMonitoringAddonName: "networkmonitor:v0.0.4",
"nodestatusfreq": DefaultKubernetesNodeStatusUpdateFrequency,
"nodegraceperiod": DefaultKubernetesCtrlMgrNodeMonitorGracePeriod,
Expand Down Expand Up @@ -217,7 +217,7 @@ var k8sComponentVersions = map[string]map[string]string{
"tiller": "tiller:v2.11.0",
"rescheduler": "rescheduler:v0.3.1",
"aci-connector": "virtual-kubelet:latest",
ContainerMonitoringAddonName: "oms:ciprod10162018-2",
ContainerMonitoringAddonName: "oms:ciprod01092019",
AzureCNINetworkMonitoringAddonName: "networkmonitor:v0.0.4",
"nodestatusfreq": DefaultKubernetesNodeStatusUpdateFrequency,
"nodegraceperiod": DefaultKubernetesCtrlMgrNodeMonitorGracePeriod,
Expand Down Expand Up @@ -246,7 +246,7 @@ var k8sComponentVersions = map[string]map[string]string{
"tiller": "tiller:v2.11.0",
"rescheduler": "rescheduler:v0.3.1",
"aci-connector": "virtual-kubelet:latest",
ContainerMonitoringAddonName: "oms:ciprod10162018-2",
ContainerMonitoringAddonName: "oms:ciprod01092019",
AzureCNINetworkMonitoringAddonName: "networkmonitor:v0.0.4",
"nodestatusfreq": DefaultKubernetesNodeStatusUpdateFrequency,
"nodegraceperiod": DefaultKubernetesCtrlMgrNodeMonitorGracePeriod,
Expand Down