Skip to content

Commit

Permalink
fix(cvr, delete): add finalizer to reclaim space on volume delete
Browse files Browse the repository at this point in the history
This commit reverts a previous commit i.e.
openebs#955

There were a few observations (that were expected) due to above
commit. CVR used to have finalizers to execute cleanup operations
due to delete event of corresponding volume. This is a standard
practice in kubernetes community to handle cleanups via
reconciliation process.

However, we had to remove finalizers as seen in the commit 955
due to various reasons. These were:

[1] Inability to delete a openebs system namespace and expect
entire openebs to get uninstalled

[2] Deletion of cstor volume did not delete CVRs due to the
presence of finalizers. This was seen most of the times across
various users and platforms. This was not easily reproducible
in development environments.

With the current release i.e. 0.9.0 some users have started
to face issues like space not getting reclaimed even after
cstor volumes get deleted successfully. On further analysis,
it was found that user had tried following operation:

`kubectl delete pvc --all -n some-namespace`

which led to deletion of pvc, pv, cv, cvr resources from k8s cluster
but corresponding events especially cvr based events were received
much later at the controller code. This resulted in `cvr resource
not found` and hence these resources were not cleaned up successfully
leading to space issues.

With this fix we expect original symptoms to recur. However, we
plan to provide another fix after the automated test suite is
able to discover these issues at will. This next fix will deal
only with symptom '2'

NOTE: Ability to un-install openebs by deleting openebs system
namespace needs to be handled by a higher order logic/process.
This is now considered as out of scope w.r.t cvr controller.

Signed-off-by: AmitKumarDas <amit.das@mayadata.io>
  • Loading branch information
AmitKumarDas committed Jun 7, 2019
1 parent 2e1c5e2 commit 07e2414
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 48 deletions.
10 changes: 9 additions & 1 deletion cmd/cstor-pool-mgmt/controller/common/common.go
Expand Up @@ -75,10 +75,18 @@ const (
MessageResourceSyncSuccess EventReason = "Resource successfully synced"
// MessageResourceSyncFailure holds message for corresponding failed sync of resource.
MessageResourceSyncFailure EventReason = "Resource sync failed:"

// FailureDestroy holds status for corresponding failed destroy resource.
FailureDestroy EventReason = "FailDestroy"
// MessageResourceFailDestroy holds message for corresponding failed destroy resource.

// FailureRemoveFinalizer holds the status when
// the resource's finalizers could not be removed
FailureRemoveFinalizer EventReason = "FailRemoveFinalizer"

// MessageResourceFailDestroy holds descriptive message for
// resource could not be deleted
MessageResourceFailDestroy EventReason = "Resource Destroy failed"

// FailureValidate holds status for corresponding failed validate resource.
FailureValidate EventReason = "FailValidate"
// MessageResourceFailValidate holds message for corresponding failed validate resource.
Expand Down
204 changes: 157 additions & 47 deletions cmd/cstor-pool-mgmt/controller/replica-controller/handler.go
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package replicacontroller

import (
"encoding/json"
"fmt"
"os"
"reflect"
Expand All @@ -26,9 +27,11 @@ import (
"github.com/openebs/maya/cmd/cstor-pool-mgmt/pool"
"github.com/openebs/maya/cmd/cstor-pool-mgmt/volumereplica"
apis "github.com/openebs/maya/pkg/apis/openebs.io/v1alpha1"
merrors "github.com/openebs/maya/pkg/errors/v1alpha1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/client-go/tools/cache"
)
Expand All @@ -53,99 +56,206 @@ type CVRPatch struct {
Value string `json:"value"`
}

// syncHandler compares the actual state with the desired, and attempts to
// converge the two. It then updates the Status block of the CStorReplicaUpdated resource
// with the current status of the resource.
func (c *CStorVolumeReplicaController) syncHandler(key string, operation common.QueueOperation) error {
cVRGot, err := c.getVolumeReplicaResource(key)
// syncHandler handles CVR changes based on the provided
// operation. It reconciles desired state of CVR with the
// actual state.
//
// Finally, it updates CVR Status
func (c *CStorVolumeReplicaController) syncHandler(
key string,
operation common.QueueOperation,
) error {
cvrGot, err := c.getVolumeReplicaResource(key)
if err != nil {
return err
}
if cVRGot == nil {
return fmt.Errorf("cannot retrieve cStorVolumeReplica %q", key)
if cvrGot == nil {
return merrors.Errorf("failed to reconcile cvr {%s}: object not found", key)
}
status, err := c.cVREventHandler(operation, cVRGot)

status, err := c.cVREventHandler(operation, cvrGot)
if status == "" {
// TODO
// need to rethink on this logic !!
// status holds more importance than error
return nil
}
cVRGot.Status.Phase = apis.CStorVolumeReplicaPhase(status)

// set phase based on received status
cvrGot.Status.Phase = apis.CStorVolumeReplicaPhase(status)

// need to update cvr before returning this error
if err != nil {
glog.Errorf(err.Error())
glog.Infof("cVR:%v, %v; Status: %v", cVRGot.Name,
string(cVRGot.GetUID()), cVRGot.Status.Phase)
_, err := c.clientset.OpenebsV1alpha1().CStorVolumeReplicas(cVRGot.Namespace).Update(cVRGot)
if err != nil {
return err
_, err1 := c.clientset.
OpenebsV1alpha1().
CStorVolumeReplicas(cvrGot.Namespace).
Update(cvrGot)
if err1 != nil {
return merrors.Wrapf(
err,
"failed to sync reconcile {%s}: failed to update cvr with phase {%s}: {%s}",
key,
cvrGot.Status.Phase,
err1.Error(),
)
}
return err
return merrors.Wrapf(err, "failed to reconcile cvr {%s}", key)
}
// Synchronize cstor volume total allocated and used capacity fields on CVR object.

// Synchronize cstor volume total allocated and
// used capacity fields on CVR object.
// Any kind of sync activity should be done from here.
// ToDo: Move status sync (of cvr) here from cVREventHandler function.
// ToDo: Instead of having statusSync, capacitySync we can make it generic resource sync which syncs all the
// ToDo: required fields on CVR ( Some code re-organization will be required)
c.syncCvr(cVRGot)
_, err = c.clientset.OpenebsV1alpha1().CStorVolumeReplicas(cVRGot.Namespace).Update(cVRGot)
c.syncCvr(cvrGot)

_, err = c.clientset.
OpenebsV1alpha1().
CStorVolumeReplicas(cvrGot.Namespace).
Update(cvrGot)
if err != nil {
return err
return merrors.Wrapf(
err,
"failed to reconcile cvr {%s}: failed to update cvr with phase {%s}",
key,
cvrGot.Status.Phase,
)
}
glog.Infof("cVR:%v, %v; Status: %v", cVRGot.Name,
string(cVRGot.GetUID()), cVRGot.Status.Phase)
return nil

glog.Infof(
"cvr {%s} reconciled successfully with current phase being {%s}",
key,
cvrGot.Status.Phase,
)
return nil
}

func (c *CStorVolumeReplicaController) cVREventHandler(operation common.QueueOperation, cVR *apis.CStorVolumeReplica) (string, error) {
func (c *CStorVolumeReplicaController) cVREventHandler(
operation common.QueueOperation,
cvrObj *apis.CStorVolumeReplica,
) (string, error) {

err := volumereplica.CheckValidVolumeReplica(cVR)
err := volumereplica.CheckValidVolumeReplica(cvrObj)
if err != nil {
c.recorder.Event(cVR, corev1.EventTypeWarning, string(common.FailureValidate), string(common.MessageResourceFailValidate))
c.recorder.Event(
cvrObj,
corev1.EventTypeWarning,
string(common.FailureValidate),
string(common.MessageResourceFailValidate),
)
return string(apis.CVRStatusOffline), err
}

// PoolNameHandler tries to get pool name and blocks for
// particular number of attempts.
var noOfAttempts = 2
if !common.PoolNameHandler(cVR, noOfAttempts) {
return string(apis.CVRStatusOffline), fmt.Errorf("Pool not present")
if !common.PoolNameHandler(cvrObj, noOfAttempts) {
return string(apis.CVRStatusOffline), merrors.New("pool not found")
}

// cStorVolumeReplica is created with command which requires fullVolName which is in
// the form of poolname/volname.
fullVolName := string(pool.PoolPrefix) + cVR.Labels["cstorpool.openebs.io/uid"] + "/" + cVR.Labels["cstorvolume.openebs.io/name"]
glog.Infof("fullVolName: %v", fullVolName)
// cvr is created at zfs in the form poolname/volname
fullVolName :=
string(pool.PoolPrefix) +
cvrObj.Labels["cstorpool.openebs.io/uid"] + "/" +
cvrObj.Labels["cstorvolume.openebs.io/name"]

switch operation {
case common.QOpAdd:
glog.Infof("Processing cvr added event: %v, %v", cVR.ObjectMeta.Name, string(cVR.GetUID()))
glog.Infof(
"will process add event for cvr {%s} as volume {%s}",
cvrObj.Name,
fullVolName,
)

status, err := c.cVRAddEventHandler(cVR, fullVolName)
status, err := c.cVRAddEventHandler(cvrObj, fullVolName)
return status, err

case common.QOpDestroy:
glog.Infof("Processing cvr deleted event %v, %v", cVR.ObjectMeta.Name, string(cVR.GetUID()))
glog.Infof(
"will process delete event for cvr {%s} as volume {%s}",
cvrObj.Name,
fullVolName,
)

err := volumereplica.DeleteVolume(fullVolName)
if err != nil {
glog.Errorf("Error in deleting volume %q: %s", cVR.ObjectMeta.Name, err)
c.recorder.Event(cVR, corev1.EventTypeWarning, string(common.FailureDestroy), string(common.MessageResourceFailDestroy))
c.recorder.Event(
cvrObj,
corev1.EventTypeWarning,
string(common.FailureDestroy),
string(common.MessageResourceFailDestroy),
)
return string(apis.CVRStatusDeletionFailed), err
}

err = c.removeFinalizer(cvrObj)
if err != nil {
c.recorder.Event(
cvrObj,
corev1.EventTypeWarning,
string(common.FailureRemoveFinalizer),
string(common.MessageResourceFailDestroy),
)
return string(apis.CVRStatusDeletionFailed), err
}

return "", nil

case common.QOpModify:
fallthrough

case common.QOpSync:
glog.Infof("CstorVolumeReplica: '%s' got '%v' event", cVR.ObjectMeta.Name, operation)
status, err := c.getCVRStatus(cVR)
if err != nil {
glog.Errorf("Unable to get volume status:%s", err.Error())
}
return status, err
glog.Infof(
"will process sync event for cvr {%s} as volume {%s}",
cvrObj.Name,
operation,
)
return c.getCVRStatus(cvrObj)
}
glog.Errorf("ignored event '%s' for CVR '%s'", string(operation), cVR.ObjectMeta.Name)

glog.Errorf(
"failed to handle event for cvr {%s}: operation {%s} not supported",
cvrObj.Name,
string(operation),
)
return string(apis.CVRStatusInvalid), nil
}

// removeFinalizer removes finalizers present in
// CVR resource
func (c *CStorVolumeReplicaController) removeFinalizer(
cvrObj *apis.CStorVolumeReplica,
) error {
cvrPatch := []CVRPatch{
CVRPatch{
Op: "remove",
Path: "/metadata/finalizers",
},
}

cvrPatchBytes, err := json.Marshal(cvrPatch)
if err != nil {
return merrors.Wrapf(
err,
"failed to remove finalizers from cvr {%s}",
cvrObj.Name,
)
}

_, err = c.clientset.
OpenebsV1alpha1().
CStorVolumeReplicas(cvrObj.Namespace).
Patch(cvrObj.Name, types.JSONPatchType, cvrPatchBytes)
if err != nil {
return merrors.Wrapf(
err,
"failed to remove finalizers from cvr {%s}",
cvrObj.Name,
)
}

glog.Infof("finalizers removed successfully from cvr {%s}", cvrObj.Name)
return nil
}

func (c *CStorVolumeReplicaController) cVRAddEventHandler(cVR *apis.CStorVolumeReplica, fullVolName string) (string, error) {
// lock is to synchronize pool and volumereplica. Until certain pool related
// operations are over, the volumereplica threads will be held.
Expand Down
1 change: 1 addition & 0 deletions pkg/install/v1alpha1/cstor_volume.go
Expand Up @@ -706,6 +706,7 @@ spec:
of the pools from resources list
*/}}
name: {{ .Volume.owner }}-{{ pluck .ListItems.currentRepeatResource .ListItems.cvolPoolList.pools | first }}
finalizers: ["cstorvolumereplica.openebs.io/finalizer"]
labels:
cstorpool.openebs.io/name: {{ pluck .ListItems.currentRepeatResource .ListItems.cvolPoolList.pools | first }}
cstorpool.openebs.io/uid: {{ .ListItems.currentRepeatResource }}
Expand Down

0 comments on commit 07e2414

Please sign in to comment.