Skip to content

Commit

Permalink
Azure Arc Diagnostic (#5025)
Browse files Browse the repository at this point in the history
  • Loading branch information
svagadia committed Jul 15, 2022
1 parent 69f8325 commit 15e5e89
Show file tree
Hide file tree
Showing 11 changed files with 1,430 additions and 6 deletions.
5 changes: 5 additions & 0 deletions src/connectedk8s/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
Release History
===============
1.2.10
++++++

* Added troubleshoot command which can be used to diagnose Arc enabled K8s clusters

1.2.9
++++++

Expand Down
58 changes: 58 additions & 0 deletions src/connectedk8s/azext_connectedk8s/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
Azure_ChinaCloudName = 'AZURECHINACLOUD'
Azure_DogfoodCloudName = 'AZUREDOGFOOD'
PublicCloud_OriginalName = 'AZURECLOUD'
MSI_Certificate_Secret_Name = 'azure-identity-certificate'
KAP_Certificate_Secret_Name = 'kube-aad-proxy-certificate'
USGovCloud_OriginalName = 'AZUREUSGOVERNMENT'
Dogfood_RMEndpoint = 'https://api-dogfood.resources.windows-int.net/'
Client_Request_Id_Header = 'x-ms-client-request-id'
Expand Down Expand Up @@ -120,6 +122,61 @@
Get_PublicKey_Info_Fault_Type = 'Error while fetching the PoP publickey information from client proxy'
PoP_Public_Key_Expried_Fault_Type = 'The PoP public key used to generate the at has expired'
Post_AT_To_ClientProxy_Failed_Fault_Type = 'Failed to post access token to client proxy'
Kubectl_Get_Events_Failed_Fault_Type = "Error while doing kubectl get events"
Fetch_Arc_Agent_Logs_Failed_Fault_Type = "Error occured in arc agents logger"
Fetch_Arc_Agents_Events_Logs_Failed_Fault_Type = "Error occured in arc agents events logger"
Fetch_Arc_Deployment_Logs_Failed_Fault_Type = "Error occured in deployments logger"
Agent_State_Check_Fault_Type = "Error occured while performing the agent state check"
Agent_Version_Check_Fault_Type = "Error occured while performing the agent version check"
Diagnoser_Job_Failed_Fault_Type = "Error while executing Diagnoser Job"
Diagnoser_Container_Check_Failed_Fault_Type = "Error occured while performing the diagnoser container checks"
Cluster_DNS_Check_Fault_Type = "Error occured while performing cluster DNS check"
Outbound_Connectivity_Check_Fault_Type = "Error occured while performing outbound connectivity check in the cluster"
MSI_Cert_Check_Fault_Type = "Error occurred while trying to perform MSI ceritificate presence check"
Cluster_Security_Policy_Check_Fault_Type = "Error occured while performing cluster security policy check"
KAP_Cert_Check_Fault_Type = "Error occurred while trying to perform KAP ceritificate presence check"
MSI_Cert_Expiry_Check_Fault_Type = "Error occured while trying to perform the MSI cert expiry check"
Diagnostics_Folder_Creation_Failed_Fault_Type = "Error while trying to create diagnostic logs folder"
Describe_Stuck_Agents_Fault_Type = "Error occured while storing the description of non running agents"
No_Storage_Space_Available_Fault_Type = "No space left on device"
Connected_Cluster_Resource_Fetch_Fault_Type = "Error occured while fetching the Get output of connected cluster"
Diagnoser_Result_Fault_Type = "Error while storing the diagnoser results"
Kubectl_Cluster_Info_Failed_Fault_Type = "Error while doing kubectl cluster-info"
Fetch_Kubectl_Cluster_Info_Fault_Type = "Error occured while fetching cluster-info"
Fetch_Kubectl_Cluster_Info = "kubectl_cluster_info"
Diagnostic_Check_Passed = "Passed"
Diagnostic_Check_Failed = "Failed"
Diagnostic_Check_Incomplete = "Incomplete"
# Name of the checks and operations
Retrieve_Arc_Agents_Event_Logs = "retrieved_arc_agents_event_logs"
Retrieve_Arc_Agents_Logs = "retrieved_arc_agents_logs"
Retrieve_Deployments_Logs = "retrieved_deployments_logs"
Fetch_Connected_Cluster_Resource = "fetch_connected_cluster_resource"
Storing_Diagnoser_Results_Logs = "storing_diagnoser_results_logs"
MSI_Cert_Expiry_Check = "msi_cert_expiry_check"
KAP_Security_Policy_Check = "kap_security_policy_check"
KAP_Cert_Check = "kap_cert_check"
Diagnoser_Check = "diagnoser_check"
MSI_Cert_Check = "msi_cert_check"
Agent_Version_Check = "agent_version_check"
Arc_Agent_State_Check = "arc_agent_state_check"
# Diagnoser files name
Arc_Agents_Logs = "arc_agents_logs"
Arc_Deployment_Logs = "arc_deployment_logs"
Arc_Diagnostic_Logs = "arc_diagnostic_logs"
Describe_Non_Ready_Arc_Agents = "describe_non_ready_arc_agents"
Agent_State = "agent_state.txt"
Arc_Agents_Events = "arc_agent_events.txt"
Diagnoser_Results = "diagnoser_output.txt"
Connected_Cluster_Resource = "connected_cluster_resource_snapshot.txt"
DNS_Check = "dns_check.txt"
K8s_Cluster_Info = "k8s_cluster_info.txt"
Outbound_Network_Connectivity_Check = "outbound_network_connectivity_check.txt"
Events_of_Incomplete_Diagnoser_Job = "diagnoser_failure_events.txt"

# Diagnostic Results Name
Outbound_Connectivity_Check_Result_String = "Outbound Network Connectivity Result:"
DNS_Check_Result_String = "DNS Result:"
AZ_CLI_ADAL_TO_MSAL_MIGRATE_VERSION = '2.30.0'
CLIENT_PROXY_VERSION = '1.3.019103'
API_SERVER_PORT = 47011
Expand All @@ -135,3 +192,4 @@
CSP_Storage_Url_Mooncake = "https://k8sconnectcsp.blob.core.chinacloudapi.cn"
HELM_STORAGE_URL = "https://k8connecthelm.azureedge.net"
HELM_VERSION = 'v3.6.3'
Download_And_Install_Kubectl_Fault_Type = "Failed to download and install kubectl"
8 changes: 8 additions & 0 deletions src/connectedk8s/azext_connectedk8s/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,11 @@
- name: Disable multiple features.
text: az connectedk8s disable-features -n clusterName -g resourceGroupName --features custom-locations azure-rbac
"""

helps['connectedk8s troubleshoot'] = """
type: command
short-summary: Perform diagnostic checks on an Arc enabled Kubernetes cluster.
examples:
- name: Perform diagnostic checks on an Arc enabled Kubernetes cluster.
text: az connectedk8s troubleshoot -n clusterName -g resourceGroupName
"""
6 changes: 6 additions & 0 deletions src/connectedk8s/azext_connectedk8s/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,9 @@ def load_arguments(self, _):
c.argument('context_name', options_list=['--kube-context'], help='If specified, overwrite the default context name.')
c.argument('path', options_list=['--file', '-f'], type=file_type, completer=FilesCompleter(), default=os.path.join(os.path.expanduser('~'), '.kube', 'config'), help="Kubernetes configuration file to update. If not provided, updates the file '~/.kube/config'. Use '-' to print YAML to stdout instead.")
c.argument('api_server_port', options_list=['--port'], help='Port used for accessing connected cluster.')

with self.argument_context('connectedk8s troubleshoot') as c:
c.argument('tags', tags_type)
c.argument('cluster_name', options_list=['--name', '-n'], help='The name of the connected cluster.')
c.argument('kube_config', options_list=['--kube-config'], help='Path to the kube config file.')
c.argument('kube_context', options_list=['--kube-context'], help='Kubconfig context from current machine.')
Loading

0 comments on commit 15e5e89

Please sign in to comment.