This repository was archived by the owner on Dec 21, 2021. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathcleanup.rs
245 lines (222 loc) · 9.33 KB
/
cleanup.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
//! Initial cleanup
//!
//! On startup the systemd units in the `system-stackable` slice are compared to the pods assigned
//! to this node. If a systemd unit is as expected then it is kept and the Stackable Agent will
//! take ownership again in the `Starting` stage. If there is no corresponding pod or the systemd
//! unit differs from the pod specification then it is removed and the Stackable Agent will create
//! a new systemd unit in the `CreatingService` stage.
//!
//! The cleanup stage is implemented as part of the [`StackableProvider`] because the expected
//! content of a systemd unit file can only be determined with the directories configured in the
//! provider.
//!
//! The cleanup code resides in a separate module because the amount of code justifies it and the
//! log output is more meaningful. It makes it clearer whether a systemd unit is removed in the
//! cleanup stage or in the normal process.
use std::collections::HashMap;
use anyhow::Context;
use k8s_openapi::api::core::v1::Pod as KubePod;
use kube::api::{ListParams, Meta, ObjectList};
use kube::Api;
use kubelet::pod::Pod;
use kubelet::provider::Provider;
use log::{debug, error, info, warn};
use tokio::fs::{read_to_string, remove_file};
use super::systemdmanager::systemdunit::SystemDUnit;
use super::systemdmanager::systemdunit::STACKABLE_SLICE;
use super::StackableProvider;
impl StackableProvider {
/// Removes systemd units without corresponding pods.
///
/// The systemd units in the `system-stackable` slice are compared with the pods assigned to
/// this node and all units without corresponding pods or which differ from the pod
/// specifications are removed.
pub async fn cleanup(&self, node_name: &str) {
let systemd_manager = &self.shared.systemd_manager;
if let Err(error) = systemd_manager.reload().await {
error!(
"Skipping the cleanup stage because the systemd daemon reload failed. {}",
error
);
return;
}
let units_in_slice = match systemd_manager.slice_content(STACKABLE_SLICE).await {
Ok(units_in_slice) => units_in_slice,
Err(error) => {
debug!(
"Skipping the cleanup stage because no systemd units were found in the slice \
[{}]. {}",
STACKABLE_SLICE, error
);
return;
}
};
let pods = match self.assigned_pods(node_name).await {
Ok(pods) => pods.items,
Err(error) => {
error!(
"The assigned pods could not be retrieved. All systemd units in the slice [{}] \
will be removed. {}",
STACKABLE_SLICE, error
);
Vec::new()
}
};
let mut units_from_pods = HashMap::new();
for pod in pods {
let pod_terminating = pod.metadata.deletion_timestamp.is_some();
match self.units_from_pod(&pod).await {
Ok(units) => {
for (unit_name, content) in units {
units_from_pods.insert(unit_name, (content, pod_terminating));
}
}
Err(error) => warn!(
"Systemd units could not be generated for pod [{}/{}]. {}",
pod.namespace().unwrap_or_else(|| String::from("default")),
pod.name(),
error
),
}
}
let mut unit_removed = false;
for unit_name in &units_in_slice {
let remove_unit = match units_from_pods.get(unit_name) {
Some((expected_content, pod_terminating)) => {
match self.unit_file_content(unit_name).await {
Ok(Some(content)) if &content == expected_content && !pod_terminating => {
info!(
"The systemd unit [{}] will be kept because a corresponding pod \
exists.",
unit_name
);
false
}
Ok(Some(_)) if *pod_terminating => {
info!(
"The systemd unit [{}] will be removed because the corresponding \
pod is terminating.",
unit_name
);
true
}
Ok(Some(content)) => {
info!(
"The systemd unit [{}] will be removed because it differs from the \
corresponding pod specification.\n\
expected content:\n\
{}\n\n\
actual content:\n\
{}",
unit_name, expected_content, content
);
true
}
Ok(None) => {
info!(
"The systemd unit [{}] will be removed because its file path could \
not be determined.",
unit_name
);
true
}
Err(error) => {
warn!(
"The systemd unit [{}] will be removed because the file content \
could not be retrieved. {}",
unit_name, error
);
true
}
}
}
None => {
info!(
"The systemd unit [{}] will be removed because no corresponding pod \
exists.",
unit_name
);
true
}
};
if remove_unit {
self.remove_unit(unit_name).await;
unit_removed = true;
}
}
if unit_removed {
let _ = systemd_manager.reload().await;
}
}
/// Returns a list of all pods assigned to the given node.
async fn assigned_pods(&self, node_name: &str) -> anyhow::Result<ObjectList<KubePod>> {
let client = &self.shared.client;
let api: Api<KubePod> = Api::all(client.to_owned());
let lp = ListParams::default().fields(&format!("spec.nodeName={}", node_name));
api.list(&lp).await.with_context(|| {
format!(
"The pods assigned to this node (nodeName = [{}]) could not be retrieved.",
node_name
)
})
}
/// Creates the systemd unit files for the given pod in memory.
///
/// A mapping from systemd unit file names to the file content is returned.
async fn units_from_pod(&self, kubepod: &KubePod) -> anyhow::Result<HashMap<String, String>> {
let systemd_manager = &self.shared.systemd_manager;
let mut units = HashMap::new();
let pod = Pod::from(kubepod.to_owned());
let pod_state = self.initialize_pod_state(&pod).await?;
for container in pod.containers() {
let unit = SystemDUnit::new(
systemd_manager.is_user_mode(),
&pod_state,
&self.shared.kubeconfig_path,
&pod,
&container,
)?;
units.insert(unit.get_name(), unit.get_unit_file_content());
}
Ok(units)
}
/// Returns the content of the given systemd unit file.
async fn unit_file_content(&self, unit_name: &str) -> anyhow::Result<Option<String>> {
let systemd_manager = &self.shared.systemd_manager;
let file_path_result = systemd_manager
.fragment_path(unit_name)
.await
.with_context(|| {
format!(
"The file path of the unit [{}] could not be determined.",
unit_name
)
});
match file_path_result {
Ok(Some(file_path)) => {
let file_content = read_to_string(&file_path)
.await
.with_context(|| format!("The file [{}] could not be read.", file_path))?;
Ok(Some(file_content))
}
Ok(None) => Ok(None),
Err(error) => Err(error),
}
}
/// Stops, disables and removes the given systemd unit.
async fn remove_unit(&self, unit_name: &str) {
let systemd_manager = &self.shared.systemd_manager;
if let Err(error) = systemd_manager.stop(unit_name).await {
warn!("{}", error);
}
if let Err(error) = systemd_manager.disable(unit_name).await {
warn!("{}", error);
}
if let Ok(Some(file_path)) = systemd_manager.fragment_path(unit_name).await {
debug!("Removing file [{}].", file_path);
if let Err(error) = remove_file(file_path).await {
warn!("{}", error);
}
}
}
}