Skip to content

Commit

Permalink
csi: cleanup csi driver resources when zero cephclusters exist
Browse files Browse the repository at this point in the history
This commit modifies ceph-csi controller to be able to cleanup
ceph-csi deployment,daemonset,services & csidriver objects
when no cephcluster exists.

It makes the following changes:
- redundant check for tp.DriverNamePrefix is removed.
- retry to start drivers within reconcile loop is removed,
  controller will retry in case of error now.
- stopDrivers() will now return error in case of failure.
- predicate delete func will now respond to cephcluster deletion.
- CSI resources will be cleaned up when cephcluster does not exist.

Fixes: rook#9697

Signed-off-by: Rakshith R <rar@redhat.com>
(cherry picked from commit 1e9920f)
  • Loading branch information
Rakshith-R authored and mergify-bot committed Feb 14, 2022
1 parent 44a324f commit 2027e7f
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 45 deletions.
26 changes: 19 additions & 7 deletions pkg/operator/ceph/csi/controller.go
Expand Up @@ -106,12 +106,23 @@ func (r *ReconcileCSI) Reconcile(context context.Context, request reconcile.Requ
}

func (r *ReconcileCSI) reconcile(request reconcile.Request) (reconcile.Result, error) {
serverVersion, err := r.context.Clientset.Discovery().ServerVersion()
if err != nil {
return opcontroller.ImmediateRetryResult, errors.Wrap(err, "failed to get server version")
}

// See if there is a CephCluster
cephClusters := &cephv1.CephClusterList{}
err := r.client.List(r.opManagerContext, cephClusters, &client.ListOptions{})
err = r.client.List(r.opManagerContext, cephClusters, &client.ListOptions{})
if err != nil {
if kerrors.IsNotFound(err) {
logger.Debug("no ceph cluster found not deploying ceph csi driver")
EnableRBD, EnableCephFS = false, false
err = r.stopDrivers(serverVersion)
if err != nil {
return opcontroller.ImmediateRetryResult, errors.Wrap(err, "failed to stop Drivers")
}

return reconcile.Result{}, nil
}
// Error reading the object - requeue the request.
Expand All @@ -121,6 +132,12 @@ func (r *ReconcileCSI) reconcile(request reconcile.Request) (reconcile.Result, e
// // Do not nothing if no ceph cluster is present
if len(cephClusters.Items) == 0 {
logger.Debug("no ceph cluster found not deploying ceph csi driver")
EnableRBD, EnableCephFS = false, false
err = r.stopDrivers(serverVersion)
if err != nil {
return opcontroller.ImmediateRetryResult, errors.Wrap(err, "failed to stop Drivers")
}

return reconcile.Result{}, nil
} else {
for _, cluster := range cephClusters.Items {
Expand Down Expand Up @@ -155,11 +172,6 @@ func (r *ReconcileCSI) reconcile(request reconcile.Request) (reconcile.Result, e
r.opConfig.Parameters = opConfig.Data
}

serverVersion, err := r.context.Clientset.Discovery().ServerVersion()
if err != nil {
return opcontroller.ImmediateRetryResult, errors.Wrap(err, "failed to get server version")
}

ownerRef, err := k8sutil.GetDeploymentOwnerReference(r.opManagerContext, r.context.Clientset, os.Getenv(k8sutil.PodNameEnvVar), r.opConfig.OperatorNamespace)
if err != nil {
logger.Warningf("could not find deployment owner reference to assign to csi drivers. %v", err)
Expand All @@ -184,7 +196,7 @@ func (r *ReconcileCSI) reconcile(request reconcile.Request) (reconcile.Result, e

err = r.validateAndConfigureDrivers(serverVersion, ownerInfo)
if err != nil {
return opcontroller.ImmediateRetryResult, errors.Wrap(err, "failed configure ceph csi")
return opcontroller.ImmediateRetryResult, errors.Wrap(err, "failed to configure ceph csi")
}

return reconcile.Result{}, nil
Expand Down
14 changes: 3 additions & 11 deletions pkg/operator/ceph/csi/csi.go
Expand Up @@ -47,21 +47,13 @@ func (r *ReconcileCSI) validateAndConfigureDrivers(serverVersion *version.Info,
}

if CSIEnabled() {
maxRetries := 3
for i := 0; i < maxRetries; i++ {
if err = r.startDrivers(serverVersion, ownerInfo, v); err != nil {
logger.Errorf("failed to start Ceph csi drivers, will retry starting csi drivers %d more times. %v", maxRetries-i-1, err)
} else {
break
}
if err = r.startDrivers(serverVersion, ownerInfo, v); err != nil {
return errors.Wrap(err, "failed to start ceph csi drivers")
}
return errors.Wrap(err, "failed to start ceph csi drivers")
}

// Check whether RBD or CephFS needs to be disabled
r.stopDrivers(serverVersion)

return nil
return r.stopDrivers(serverVersion)
}

func (r *ReconcileCSI) setParams() error {
Expand Down
7 changes: 7 additions & 0 deletions pkg/operator/ceph/csi/predicate.go
Expand Up @@ -94,6 +94,13 @@ func predicateController(ctx context.Context, c client.Client, opNamespace strin
if cm, ok := e.Object.(*v1.ConfigMap); ok {
return cm.Name == opcontroller.OperatorSettingConfigMapName
}

// if cephCluster is deleted, trigger reconcile to cleanup the csi driver resources
// if zero cephClusters exist.
if _, ok := e.Object.(*cephv1.CephCluster); ok {
return true
}

return false
},

Expand Down
49 changes: 22 additions & 27 deletions pkg/operator/ceph/csi/spec.go
Expand Up @@ -221,11 +221,8 @@ func (r *ReconcileCSI) startDrivers(ver *version.Info, ownerInfo *k8sutil.OwnerI
Param: CSIParam,
Namespace: r.opConfig.OperatorNamespace,
}
// if the user didn't specify a custom DriverNamePrefix use
// the namespace (and a dot).
if tp.DriverNamePrefix == "" {
tp.DriverNamePrefix = fmt.Sprintf("%s.", r.opConfig.OperatorNamespace)
}

tp.DriverNamePrefix = fmt.Sprintf("%s.", r.opConfig.OperatorNamespace)

CephFSDriverName = tp.DriverNamePrefix + "cephfs.csi.ceph.com"
RBDDriverName = tp.DriverNamePrefix + "rbd.csi.ceph.com"
Expand Down Expand Up @@ -561,58 +558,56 @@ func (r *ReconcileCSI) startDrivers(ver *version.Info, ownerInfo *k8sutil.OwnerI
return nil
}

func (r *ReconcileCSI) stopDrivers(ver *version.Info) {
func (r *ReconcileCSI) stopDrivers(ver *version.Info) error {
RBDDriverName = fmt.Sprintf("%s.rbd.csi.ceph.com", r.opConfig.OperatorNamespace)
CephFSDriverName = fmt.Sprintf("%s.cephfs.csi.ceph.com", r.opConfig.OperatorNamespace)

if !EnableRBD {
logger.Info("CSI Ceph RBD driver disabled")
succeeded := r.deleteCSIDriverResources(ver, csiRBDPlugin, csiRBDProvisioner, "csi-rbdplugin-metrics", RBDDriverName)
if succeeded {
logger.Info("successfully removed CSI Ceph RBD driver")
} else {
logger.Error("failed to remove CSI Ceph RBD driver")
err := r.deleteCSIDriverResources(ver, csiRBDPlugin, csiRBDProvisioner, "csi-rbdplugin-metrics", RBDDriverName)
if err != nil {
return errors.Wrap(err, "failed to remove CSI Ceph RBD driver")
}
logger.Info("successfully removed CSI Ceph RBD driver")
}

if !EnableCephFS {
logger.Info("CSI CephFS driver disabled")
succeeded := r.deleteCSIDriverResources(ver, csiCephFSPlugin, csiCephFSProvisioner, "csi-cephfsplugin-metrics", CephFSDriverName)
if succeeded {
logger.Info("successfully removed CSI CephFS driver")
} else {
logger.Error("failed to remove CSI CephFS driver")
err := r.deleteCSIDriverResources(ver, csiCephFSPlugin, csiCephFSProvisioner, "csi-cephfsplugin-metrics", CephFSDriverName)
if err != nil {
return errors.Wrap(err, "failed to remove CSI CephFS driver")
}
logger.Info("successfully removed CSI CephFS driver")
}

return nil
}

func (r *ReconcileCSI) deleteCSIDriverResources(ver *version.Info, daemonset, deployment, service, driverName string) bool {
succeeded := true
func (r *ReconcileCSI) deleteCSIDriverResources(ver *version.Info, daemonset, deployment, service, driverName string) error {
csiDriverobj = beta1CsiDriver{}
if ver.Major > KubeMinMajor || ver.Major == KubeMinMajor && ver.Minor >= kubeMinVerForV1csiDriver {
csiDriverobj = v1CsiDriver{}
}
err := k8sutil.DeleteDaemonset(r.opManagerContext, r.context.Clientset, r.opConfig.OperatorNamespace, daemonset)
if err != nil {
logger.Errorf("failed to delete the %q. %v", daemonset, err)
succeeded = false
return errors.Wrapf(err, "failed to delete the %q", daemonset)
}

err = k8sutil.DeleteDeployment(r.opManagerContext, r.context.Clientset, r.opConfig.OperatorNamespace, deployment)
if err != nil {
logger.Errorf("failed to delete the %q. %v", deployment, err)
succeeded = false
return errors.Wrapf(err, "failed to delete the %q", deployment)
}

err = k8sutil.DeleteService(r.opManagerContext, r.context.Clientset, r.opConfig.OperatorNamespace, service)
if err != nil {
logger.Errorf("failed to delete the %q. %v", service, err)
succeeded = false
return errors.Wrapf(err, "failed to delete the %q", service)
}

err = csiDriverobj.deleteCSIDriverInfo(r.opManagerContext, r.context.Clientset, driverName)
if err != nil {
logger.Errorf("failed to delete %q Driver Info. %v", driverName, err)
succeeded = false
return errors.Wrapf(err, "failed to delete %q Driver Info", driverName)
}
return succeeded
return nil
}

func (r *ReconcileCSI) applyCephClusterNetworkConfig(ctx context.Context, objectMeta *metav1.ObjectMeta) (bool, error) {
Expand Down

0 comments on commit 2027e7f

Please sign in to comment.