Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 54 additions & 33 deletions playbooks/cns.yaml

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions playbooks/cns_values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ runc_version: "1.4.0"
cni_plugins_version: "1.7.1"
containerd_max_concurrent_downloads: "5"
nvidia_container_toolkit_version: "1.18.1"
docker_ce_version: "29.4.3"
docker_ce_apt_version: "5:{{ docker_ce_version }}-1~ubuntu.{{ ansible_distribution_version }}~{{ ansible_distribution_release }}"
docker_apt_arch: "{{ 'arm64' if ansible_architecture == 'aarch64' else 'amd64' }}"
crio_version: "1.33.6"
cri_dockerd_version: "0.4.0"
k8s_version: "1.33.6"
Expand Down
3 changes: 3 additions & 0 deletions playbooks/cns_values_16.0.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ runc_version: "1.3.0"
cni_plugins_version: "1.7.1"
containerd_max_concurrent_downloads: "5"
nvidia_container_toolkit_version: "1.17.8"
docker_ce_version: "29.4.3"
docker_ce_apt_version: "5:{{ docker_ce_version }}-1~ubuntu.{{ ansible_distribution_version }}~{{ ansible_distribution_release }}"
docker_apt_arch: "{{ 'arm64' if ansible_architecture == 'aarch64' else 'amd64' }}"
crio_version: "1.33.2"
cri_dockerd_version: "0.3.18"
k8s_version: "1.33.2"
Expand Down
3 changes: 3 additions & 0 deletions playbooks/cns_values_16.1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ runc_version: "1.4.0"
cni_plugins_version: "1.7.1"
containerd_max_concurrent_downloads: "5"
nvidia_container_toolkit_version: "1.18.1"
docker_ce_version: "29.4.3"
docker_ce_apt_version: "5:{{ docker_ce_version }}-1~ubuntu.{{ ansible_distribution_version }}~{{ ansible_distribution_release }}"
docker_apt_arch: "{{ 'arm64' if ansible_architecture == 'aarch64' else 'amd64' }}"
crio_version: "1.33.6"
cri_dockerd_version: "0.4.0"
k8s_version: "1.33.6"
Expand Down
3 changes: 3 additions & 0 deletions playbooks/cns_values_17.0.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ runc_version: "1.4.0"
cni_plugins_version: "1.7.1"
containerd_max_concurrent_downloads: "5"
nvidia_container_toolkit_version: "1.18.1"
docker_ce_version: "29.4.3"
docker_ce_apt_version: "5:{{ docker_ce_version }}-1~ubuntu.{{ ansible_distribution_version }}~{{ ansible_distribution_release }}"
docker_apt_arch: "{{ 'arm64' if ansible_architecture == 'aarch64' else 'amd64' }}"
crio_version: "1.34.2"
cri_dockerd_version: "0.4.0"
k8s_version: "1.34.2"
Expand Down
2 changes: 1 addition & 1 deletion playbooks/k8s-install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@

- name: Update Network plugin for Calico on NVIDIA Cloud Native Stack
when: "cns_version >= 3.1 and release != 'tegra'"
shell: "sleep 25; kubectl set env daemonset/calico-node -n kube-system IP_AUTODETECTION_METHOD=interface=ens*,eth*,enc*,bond*,enp*,eno*"
shell: "sleep 25; kubectl set env daemonset/calico-node -n kube-system IP_AUTODETECTION_METHOD=interface=ens*,eth*,enc*,bond*,enp*,eno*,enP*"
retries: 5
delay: 5
register: update_network_plugin_calico
Expand Down
80 changes: 60 additions & 20 deletions playbooks/nvidia-docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,24 +61,25 @@
register: install_docker_deps_rhel
until: install_docker_deps_rhel is succeeded

- name: create docker.asc file
- name: Create Docker APT KeyRing directory
when: ansible_distribution == 'Ubuntu'
become: true
file:
path: /etc/apt/keyrings/docker.asc
mode: '0644'
state: touch
path: /etc/apt/keyrings
mode: '0755'
state: directory
retries: 5
delay: 5
register: create_docker_asc
until: create_docker_asc is succeeded
register: create_docker_keyring_dir
until: create_docker_keyring_dir is succeeded

- name: Add Docker APT signing key
when: docker_exists.rc >= 1 and ansible_distribution == 'Ubuntu'
ansible.builtin.apt_key:
become: true
ansible.builtin.get_url:
url: "https://download.docker.com/linux/{{ ansible_distribution | lower }}/gpg"
keyring: /etc/apt/keyrings/docker.asc
state: present
dest: /etc/apt/keyrings/docker.asc
mode: '0644'
retries: 5
delay: 5
register: add_docker_key
Expand All @@ -87,7 +88,7 @@
- name: Add Docker repository into sources list
when: docker_exists.rc >= 1 and ansible_distribution == 'Ubuntu'
ansible.builtin.apt_repository:
repo: "deb [signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/{{ ansible_distribution | lower }} {{ ansible_distribution_release }} stable"
repo: "deb [arch={{ docker_apt_arch }} signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/{{ ansible_distribution | lower }} {{ ansible_distribution_release }} stable"
state: present
filename: docker
retries: 5
Expand Down Expand Up @@ -179,19 +180,31 @@

- name: Install Docker on Ubuntu
when: docker_exists.rc >= 1 and ansible_distribution == 'Ubuntu' and microk8s == false
package:
name: ['docker-ce', 'docker-ce-cli', 'docker-buildx-plugin', 'docker-compose-plugin']
state: latest
ansible.builtin.apt:
name:
- "docker-ce={{ docker_ce_apt_version }}"
- "docker-ce-cli={{ docker_ce_apt_version }}"
- docker-buildx-plugin
- docker-compose-plugin
state: present
update_cache: true
allow_downgrades: true
retries: 5
delay: 5
register: install_docker_ubuntu
until: install_docker_ubuntu is succeeded

- name: Install Docker on Ubuntu
when: docker_exists.rc >= 1 and ansible_distribution == 'Ubuntu' and microk8s == true
package:
name: ['docker-ce', 'docker-ce-cli', 'docker-buildx-plugin', 'docker-compose-plugin']
state: latest
ansible.builtin.apt:
name:
- "docker-ce={{ docker_ce_apt_version }}"
- "docker-ce-cli={{ docker_ce_apt_version }}"
- docker-buildx-plugin
- docker-compose-plugin
state: present
update_cache: true
allow_downgrades: true
retries: 5
delay: 5
register: install_docker_ubuntu_microk8s
Expand Down Expand Up @@ -235,15 +248,42 @@

- name: Add NVIDIA Docker apt signing key for Ubuntu
when: nvidia_docker_exists.rc >= 1 and ansible_distribution == 'Ubuntu'
apt_key:
become: true
get_url:
url: https://nvidia.github.io/libnvidia-container/gpgkey
keyring: /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
state: present
dest: /tmp/nvidia-container-toolkit-keyring.asc
mode: '0644'
retries: 5
delay: 5
register: download_nvidia_docker_key
until: download_nvidia_docker_key is succeeded

- name: Check NVIDIA Container Toolkit apt signing keyring for Ubuntu
when: nvidia_docker_exists.rc >= 1 and ansible_distribution == 'Ubuntu'
become: true
stat:
path: /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
register: nvidia_docker_keyring

- name: Add NVIDIA Container Toolkit apt signing key for Ubuntu
when:
- nvidia_docker_exists.rc >= 1
- ansible_distribution == 'Ubuntu'
- download_nvidia_docker_key.changed or not nvidia_docker_keyring.stat.exists
become: true
command: "gpg --dearmor --yes -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg /tmp/nvidia-container-toolkit-keyring.asc"
retries: 5
delay: 5
register: add_nvidia_docker_key
until: add_nvidia_docker_key is succeeded

- name: Set NVIDIA Container Toolkit apt signing key permissions for Ubuntu
when: nvidia_docker_exists.rc >= 1 and ansible_distribution == 'Ubuntu'
become: true
file:
path: /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
mode: '0644'

- name: Get NVIDIA Container Toolkit Apt list
when: nvidia_docker_exists.rc >= 1 and ansible_distribution == 'Ubuntu'
apt_repository:
Expand Down Expand Up @@ -458,4 +498,4 @@
retries: 5
delay: 5
register: add_path_to_bashrc
until: add_path_to_bashrc is succeeded
until: add_path_to_bashrc is succeeded
4 changes: 3 additions & 1 deletion playbooks/nvidia-driver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@
- name: Remove old keyring
shell:
cmd: "apt-key del 7fa2af80"
failed_when: false
changed_when: remove_keyring.rc == 0
retries: 5
delay: 5
register: remove_keyring
Expand Down Expand Up @@ -379,4 +381,4 @@
retries: 5
delay: 5
register: enable_fabric_manager
until: enable_fabric_manager is succeeded
until: enable_fabric_manager is succeeded
Loading