Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cephfs: fix future rctimes #37938

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions qa/workunits/fs/misc/rstats.sh
Expand Up @@ -75,6 +75,11 @@ touch d1/d2/f3 # create new file
wait_until_changed rctime
check_rctime

old_value=`getfattr --only-value -n ceph.dir.rctime .`
setfattr -n "ceph.dir.rctime" -v `date -d 'now+1hour' +%s` .
wait_until_changed rctime
check_rctime

cd ..
rm -rf rstats_testdir
echo OK
8 changes: 7 additions & 1 deletion src/client/Client.cc
Expand Up @@ -12918,7 +12918,13 @@ const Client::VXattr Client::_dir_vxattrs[] = {
XATTR_NAME_CEPH(dir, rsubdirs, VXATTR_RSTAT),
XATTR_NAME_CEPH(dir, rsnaps, VXATTR_RSTAT),
XATTR_NAME_CEPH(dir, rbytes, VXATTR_RSTAT),
XATTR_NAME_CEPH(dir, rctime, VXATTR_RSTAT),
{
name: "ceph.dir.rctime",
getxattr_cb: &Client::_vxattrcb_dir_rctime,
readonly: false,
exists_cb: NULL,
flags: VXATTR_RSTAT,
},
{
name: "ceph.quota",
getxattr_cb: &Client::_vxattrcb_quota,
Expand Down
34 changes: 19 additions & 15 deletions src/mds/Locker.cc
Expand Up @@ -2852,11 +2852,9 @@ bool Locker::check_inode_max_size(CInode *in, bool force_wrlock,
pi.inode->rstat.rbytes = new_size;
dout(10) << "check_inode_max_size mtime " << pi.inode->mtime << " -> " << new_mtime << dendl;
pi.inode->mtime = new_mtime;
if (new_mtime > pi.inode->ctime) {
pi.inode->ctime = new_mtime;
if (new_mtime > pi.inode->rstat.rctime)
pi.inode->rstat.rctime = new_mtime;
}
pi.inode->ctime = ceph_clock_now();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

@lxbsz lxbsz Mar 17, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, and only the recovery will set the new_size and also the mtime will be readed from the data pool objects, I don't see any case will user modified mtime could affect the ctime here.

if (pi.inode->ctime > pi.inode->rstat.rctime)
pi.inode->rstat.rctime = pi.inode->ctime;
thmour marked this conversation as resolved.
Show resolved Hide resolved
}

// use EOpen if the file is still open; otherwise, use EUpdate.
Expand Down Expand Up @@ -3657,22 +3655,16 @@ void Locker::_update_cap_fields(CInode *in, int dirty, const cref_t<MClientCaps>
return;

/* m must be valid if there are dirty caps */
bool changed = false;
ceph_assert(m);
uint64_t features = m->get_connection()->get_features();

if (m->get_ctime() > pi->ctime) {
dout(7) << " ctime " << pi->ctime << " -> " << m->get_ctime()
<< " for " << *in << dendl;
pi->ctime = m->get_ctime();
if (m->get_ctime() > pi->rstat.rctime)
pi->rstat.rctime = m->get_ctime();
}

if ((features & CEPH_FEATURE_FS_CHANGE_ATTR) &&
m->get_change_attr() > pi->change_attr) {
dout(7) << " change_attr " << pi->change_attr << " -> " << m->get_change_attr()
<< " for " << *in << dendl;
pi->change_attr = m->get_change_attr();
changed = true;
}

// file
Expand All @@ -3687,20 +3679,21 @@ void Locker::_update_cap_fields(CInode *in, int dirty, const cref_t<MClientCaps>
dout(7) << " mtime " << pi->mtime << " -> " << mtime
<< " for " << *in << dendl;
pi->mtime = mtime;
if (mtime > pi->rstat.rctime)
pi->rstat.rctime = mtime;
thmour marked this conversation as resolved.
Show resolved Hide resolved
changed = true;
}
if (in->is_file() && // ONLY if regular file
size > pi->size) {
dout(7) << " size " << pi->size << " -> " << size
<< " for " << *in << dendl;
pi->size = size;
pi->rstat.rbytes = size;
changed = true;
}
if (in->is_file() &&
(dirty & CEPH_CAP_FILE_WR) &&
inline_version > pi->inline_data.version) {
pi->inline_data.version = inline_version;
changed = true;
if (inline_version != CEPH_INLINE_NONE && m->inline_data.length() > 0)
pi->inline_data.set_data(m->inline_data);
else
Expand All @@ -3710,12 +3703,14 @@ void Locker::_update_cap_fields(CInode *in, int dirty, const cref_t<MClientCaps>
dout(7) << " atime " << pi->atime << " -> " << atime
<< " for " << *in << dendl;
pi->atime = atime;
changed = true;
}
if ((dirty & CEPH_CAP_FILE_EXCL) &&
ceph_seq_cmp(pi->time_warp_seq, m->get_time_warp_seq()) < 0) {
dout(7) << " time_warp_seq " << pi->time_warp_seq << " -> " << m->get_time_warp_seq()
<< " for " << *in << dendl;
pi->time_warp_seq = m->get_time_warp_seq();
changed = true;
}
}
// auth
Expand All @@ -3725,26 +3720,35 @@ void Locker::_update_cap_fields(CInode *in, int dirty, const cref_t<MClientCaps>
<< " -> " << m->head.uid
<< " for " << *in << dendl;
pi->uid = m->head.uid;
changed = true;
}
if (m->head.gid != pi->gid) {
dout(7) << " gid " << pi->gid
<< " -> " << m->head.gid
<< " for " << *in << dendl;
pi->gid = m->head.gid;
changed = true;
}
if (m->head.mode != pi->mode) {
dout(7) << " mode " << oct << pi->mode
<< " -> " << m->head.mode << dec
<< " for " << *in << dendl;
pi->mode = m->head.mode;
changed = true;
}
if ((features & CEPH_FEATURE_FS_BTIME) && m->get_btime() != pi->btime) {
dout(7) << " btime " << oct << pi->btime
<< " -> " << m->get_btime() << dec
<< " for " << *in << dendl;
pi->btime = m->get_btime();
changed = true;
}
}

if (changed) {
pi->ctime = std::max(pi->ctime, ceph_clock_now());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the ctime change here seems buggy. The client could cache the ctime changing such as if the Fx or Ax is issued. What if the cached ctime is flushed to MDS 1 minute later ? The ctime here will always be increased by 1 miniute ?

pi->rstat.rctime = std::max(pi->rstat.rctime, pi->ctime);
}
}

/*
Expand Down
9 changes: 2 additions & 7 deletions src/mds/MDCache.cc
Expand Up @@ -2183,12 +2183,7 @@ void MDCache::predirty_journal_parents(MutationRef mut, EMetaBlob *blob,
pf->fragstat.mtime = mut->get_op_stamp();
pf->fragstat.change_attr++;
dout(10) << "predirty_journal_parents bumping change_attr to " << pf->fragstat.change_attr << " on " << parent << dendl;
if (pf->fragstat.mtime > pf->rstat.rctime) {
dout(10) << "predirty_journal_parents updating mtime on " << *parent << dendl;
pf->rstat.rctime = pf->fragstat.mtime;
} else {
dout(10) << "predirty_journal_parents updating mtime UNDERWATER on " << *parent << dendl;
}
thmour marked this conversation as resolved.
Show resolved Hide resolved
pf->rstat.rctime = std::max(pf->rstat.rctime, ceph_clock_now());
}
if (linkunlink) {
dout(10) << "predirty_journal_parents updating size on " << *parent << dendl;
Expand Down Expand Up @@ -2306,7 +2301,7 @@ void MDCache::predirty_journal_parents(MutationRef mut, EMetaBlob *blob,
pi.inode->dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, &touched_mtime, &touched_chattr);
pf->accounted_fragstat = pf->fragstat;
if (touched_mtime)
pi.inode->mtime = pi.inode->ctime = pi.inode->dirstat.mtime;
pi.inode->mtime = pi.inode->dirstat.mtime;
if (touched_chattr)
pi.inode->change_attr = pi.inode->dirstat.change_attr;
dout(20) << "predirty_journal_parents gives " << pi.inode->dirstat << " on " << *pin << dendl;
Expand Down
43 changes: 43 additions & 0 deletions src/mds/Server.cc
Expand Up @@ -5324,6 +5324,21 @@ bool Server::xlock_policylock(MDRequestRef& mdr, CInode *in, bool want_layout, b
return true;
}

bool Server::wrlock_nestlock(MDRequestRef& mdr, CInode *in)
{
if (mdr->locking_state & MutationImpl::ALL_LOCKED)
return true;

MutationImpl::LockOpVec lov;
lov.add_wrlock(&in->nestlock);

if (!mds->locker->acquire_locks(mdr, lov))
return false;

mdr->locking_state |= MutationImpl::ALL_LOCKED;
return true;
}

CInode* Server::try_get_auth_inode(MDRequestRef& mdr, inodeno_t ino)
{
CInode *in = mdcache->get_inode(ino);
Expand Down Expand Up @@ -5861,6 +5876,34 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur)
auto pi = cur->project_inode(mdr);
cur->setxattr_ephemeral_dist(val);
pip = pi.inode.get();
} else if (name == "ceph.dir.rctime") {
if (!cur->is_dir()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do you update an inode's rctime? You could fix all of the rctimes but none of the inodes?

respond_to_request(mdr, -EINVAL);
return;
}

uint64_t sec, nsec = 0;
try {
if (auto pos = value.find('.', 0); pos != string::npos) {
sec = boost::lexical_cast<uint64_t>(value.substr(0, pos));
nsec = boost::lexical_cast<uint64_t>(value.substr(pos+1));
} else {
sec = boost::lexical_cast<uint64_t>(value);
}
} catch (boost::bad_lexical_cast const&) {
dout(10) << "bad vxattr value, unable to parse utime_t for " << name << dendl;
respond_to_request(mdr, -EINVAL);
return;
}
utime_t val(sec, nsec);

if (!wrlock_nestlock(mdr, cur))
return;

auto pi = cur->project_inode(mdr);
pi.inode->rstat.rctime = val;
mdr->no_early_reply = true;
pip = pi.inode.get();
} else {
dout(10) << " unknown vxattr " << name << dendl;
respond_to_request(mdr, -CEPHFS_EINVAL);
Expand Down
4 changes: 3 additions & 1 deletion src/mds/Server.h
Expand Up @@ -206,6 +206,7 @@ class Server {

bool xlock_policylock(MDRequestRef& mdr, CInode *in,
bool want_layout=false, bool xlock_snaplock=false);
bool wrlock_nestlock(MDRequestRef& mdr, CInode *in);
CInode* try_get_auth_inode(MDRequestRef& mdr, inodeno_t ino);
void handle_client_setattr(MDRequestRef& mdr);
void handle_client_setlayout(MDRequestRef& mdr);
Expand Down Expand Up @@ -418,7 +419,8 @@ class Server {
xattr_name == "ceph.dir.subvolume" ||
xattr_name == "ceph.dir.pin" ||
xattr_name == "ceph.dir.pin.random" ||
xattr_name == "ceph.dir.pin.distributed";
xattr_name == "ceph.dir.pin.distributed" ||
xattr_name == "ceph.dir.rctime";
}

static bool is_allowed_ceph_xattr(std::string_view xattr_name) {
Expand Down