Skip to content

Commit

Permalink
LU-376 Positive LL_DIR_END_OFF to indicate the tail of dir hash/offset
Browse files Browse the repository at this point in the history
1) Keep 'MDS_DIR_END_OFF' unchanged (0xfffffffffffffffeULL) to simplify
   dir hash/offset related interoperability issues.
2) Introduce positive "LL_DIR_END_OFF" (0x7fffffffffffffffULL) on client
   to indicate the tail of dir hash/offset for up layer callers, like
   llseek(), readdir(), and so on.
3) Support 1.8 client to talk with old 2.0 server with 32bit hash.

Change-Id: I126ddb170b9ee24d1ae0610ac6343c9b0f5e4c70
Signed-off-by: nasf <yong.fan@whamcloud.com>
Reviewed-on: http://review.whamcloud.com/887
Tested-by: Hudson
Reviewed-by: Johann Lombardi <johann@whamcloud.com>
  • Loading branch information
nasf authored and morrone committed Sep 16, 2011
1 parent fb27897 commit fc5dd60
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 103 deletions.
1 change: 1 addition & 0 deletions lustre/include/lustre/lustre_idl.h
Expand Up @@ -768,6 +768,7 @@ enum lu_dirent_attrs {
LUDA_TYPE = 0x0002,
};

#define MDS_DIR_END_OFF 0xfffffffffffffffeULL

extern void lustre_swab_ll_fid (struct ll_fid *fid);

Expand Down
157 changes: 77 additions & 80 deletions lustre/llite/dir.c
Expand Up @@ -434,12 +434,11 @@ static inline void ll_dir_chain_fini(struct ll_dir_chain *chain)
{
}

static inline unsigned long hash_x_index(__u64 hash)
static inline unsigned long hash_x_index(__u64 hash, int hash64)
{
#ifdef __KERNEL__
# if BITS_PER_LONG == 32
hash >>= 32;
# endif
if (BITS_PER_LONG == 32 && hash64)
hash >>= 32;
#endif
return ~0UL - hash;
}
Expand Down Expand Up @@ -573,9 +572,6 @@ static inline int lu_dirent_size(struct lu_dirent *ent)
return le16_to_cpu(ent->lde_reclen);
}

#define DIR_END_OFF 0xfffffffffffffffeULL
#define DIR_END_OFF_32BIT 0xfffffffeUL

#ifdef HAVE_RW_TREE_LOCK
#define TREE_READ_LOCK_IRQ(mapping) read_lock_irq(&(mapping)->tree_lock)
#define TREE_READ_UNLOCK_IRQ(mapping) read_unlock_irq(&(mapping)->tree_lock)
Expand Down Expand Up @@ -640,13 +636,14 @@ static void ll_check_page(struct inode *dir, struct page *page)
static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
__u64 *start, __u64 *end)
{
int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
struct address_space *mapping = dir->i_mapping;
/*
* Complement of hash is used as an index so that
* radix_tree_gang_lookup() can be used to find a page with starting
* hash _smaller_ than one we are looking for.
*/
unsigned long offset = hash_x_index(*hash);
unsigned long offset = hash_x_index(*hash, hash64);
struct page *page;
int found;
ENTRY;
Expand All @@ -670,14 +667,14 @@ static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
wait_on_page(page);
if (PageUptodate(page)) {
dp = kmap(page);
#if BITS_PER_LONG == 32
*start = le64_to_cpu(dp->ldp_hash_start) >> 32;
*end = le64_to_cpu(dp->ldp_hash_end) >> 32;
*hash = *hash >> 32;
#else
*start = le64_to_cpu(dp->ldp_hash_start);
*end = le64_to_cpu(dp->ldp_hash_end);
#endif
if (BITS_PER_LONG == 32 && hash64) {
*start = le64_to_cpu(dp->ldp_hash_start) >> 32;
*end = le64_to_cpu(dp->ldp_hash_end) >> 32;
*hash = *hash >> 32;
} else {
*start = le64_to_cpu(dp->ldp_hash_start);
*end = le64_to_cpu(dp->ldp_hash_end);
}
LASSERTF(*start <= *hash, "start = "LPX64",end = "
LPX64",hash = "LPX64"\n", *start, *end, *hash);
if (*hash > *end || (*end != *start && *hash == *end)) {
Expand Down Expand Up @@ -716,6 +713,7 @@ static struct page *ll_get_dir_page_20(struct file *filp, struct inode *dir,
__u64 start = 0;
__u64 end = 0;
__u64 lhash = hash;
int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
ENTRY;

fid_build_reg_res_name(ll_inode_lu_fid(dir), &res_id);
Expand Down Expand Up @@ -781,7 +779,7 @@ static struct page *ll_get_dir_page_20(struct file *filp, struct inode *dir,
}
}

page = read_cache_page(mapping, hash_x_index(hash),
page = read_cache_page(mapping, hash_x_index(hash, hash64),
(filler_t*)ll_dir_readpage_20, filp);
if (IS_ERR(page))
GOTO(out_unlock, page);
Expand All @@ -797,23 +795,23 @@ static struct page *ll_get_dir_page_20(struct file *filp, struct inode *dir,
hash_collision:
dp = page_address(page);

#if BITS_PER_LONG == 32
start = le64_to_cpu(dp->ldp_hash_start) >> 32;
end = le64_to_cpu(dp->ldp_hash_end) >> 32;
lhash = hash >> 32;
#else
start = le64_to_cpu(dp->ldp_hash_start);
end = le64_to_cpu(dp->ldp_hash_end);
lhash = hash;
#endif
if (BITS_PER_LONG == 32 && hash64) {
start = le64_to_cpu(dp->ldp_hash_start) >> 32;
end = le64_to_cpu(dp->ldp_hash_end) >> 32;
lhash = hash >> 32;
} else {
start = le64_to_cpu(dp->ldp_hash_start);
end = le64_to_cpu(dp->ldp_hash_end);
lhash = hash;
}
if (end == start) {
LASSERT(start == lhash);
CWARN("Page-wide hash collision: "LPU64"\n", end);
#if BITS_PER_LONG == 32
CWARN("Real page-wide hash collision at ["LPU64" "LPU64"] with "
"hash "LPU64"\n", le64_to_cpu(dp->ldp_hash_start),
le64_to_cpu(dp->ldp_hash_end), hash);
#endif
if (BITS_PER_LONG == 32 && hash64)
CWARN("Real page-wide hash collision at ["LPU64" "LPU64
"] with hash "LPU64"\n",
le64_to_cpu(dp->ldp_hash_start),
le64_to_cpu(dp->ldp_hash_end), hash);
/*
* Fetch whole overflow chain...
*
Expand All @@ -837,21 +835,20 @@ static int ll_readdir_20(struct file *filp, void *cookie, filldir_t filldir)
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
__u64 pos = fd->fd_dir.lfd_pos;
int api32 = ll_need_32bit_api(sbi);
int hash64= sbi->ll_flags & LL_SBI_64BIT_HASH;
struct page *page;
struct ll_dir_chain chain;
int rc;
int done;
int shift,need_32bit;
__u16 type;
int rc;
int done;
int shift;
ENTRY;

need_32bit = ll_need_32bit_api(sbi);

CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n",
inode->i_ino, inode->i_generation, inode,
(unsigned long)pos, i_size_read(inode), need_32bit);
(unsigned long)pos, i_size_read(inode), api32);

if (pos == DIR_END_OFF)
if (pos == MDS_DIR_END_OFF)
/*
* end-of-file.
*/
Expand All @@ -875,17 +872,17 @@ static int ll_readdir_20(struct file *filp, void *cookie, filldir_t filldir)
* If page is empty (end of directoryis reached),
* use this value.
*/
__u64 hash = DIR_END_OFF;
__u64 hash = MDS_DIR_END_OFF;
__u64 next;

dp = page_address(page);
for (ent = lu_dirent_start(dp); ent != NULL && !done;
ent = lu_dirent_next(ent)) {
char *name;
__u16 type;
int namelen;
struct lu_fid fid;
__u64 ino;
__u64 lhash;
__u64 ino;

hash = le64_to_cpu(ent->lde_hash);
if (hash < pos)
Expand All @@ -902,25 +899,22 @@ static int ll_readdir_20(struct file *filp, void *cookie, filldir_t filldir)
*/
continue;

name = ent->lde_name;
fid_le_to_cpu(&fid, &ent->lde_fid);
if (need_32bit) {
ino = ll_fid_build_ino((struct ll_fid *)&fid,
api32);
if (api32 && hash64)
lhash = hash >> 32;
ino = ll_fid_build_ino32((struct ll_fid *)&fid);
} else {
else
lhash = hash;
ino = ll_fid_build_ino((struct ll_fid *)&fid);
}

type = ll_dirent_type_get(ent);
done = filldir(cookie, name, namelen,
done = filldir(cookie, ent->lde_name, namelen,
lhash, ino, type);
}
next = le64_to_cpu(dp->ldp_hash_end);
ll_put_page(page);
if (!done) {
pos = next;
if (pos == DIR_END_OFF) {
if (pos == MDS_DIR_END_OFF) {
/*
* End of directory reached.
*/
Expand Down Expand Up @@ -951,13 +945,16 @@ static int ll_readdir_20(struct file *filp, void *cookie, filldir_t filldir)
}

fd->fd_dir.lfd_pos = pos;
if (need_32bit) {
if (pos == DIR_END_OFF)
filp->f_pos = DIR_END_OFF_32BIT;
if (pos == MDS_DIR_END_OFF) {
if (api32)
filp->f_pos = LL_DIR_END_OFF_32BIT;
else
filp->f_pos = pos >> 32;
filp->f_pos = LL_DIR_END_OFF;
} else {
filp->f_pos = pos;
if (api32 && hash64)
filp->f_pos = pos >> 32;
else
filp->f_pos = pos;
}
filp->f_version = inode->i_version;
touch_atime(filp->f_vfsmnt, filp->f_dentry);
Expand Down Expand Up @@ -1661,9 +1658,9 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
{
struct inode *inode = file->f_mapping->host;
struct ll_sb_info *sbi = ll_i2sbi(inode);
int need_32bit = ll_need_32bit_api(sbi);
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
struct ll_sb_info *sbi = ll_i2sbi(inode);
int api32 = ll_need_32bit_api(sbi);
loff_t ret = -EINVAL;
ENTRY;

Expand All @@ -1672,40 +1669,40 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)

mutex_lock(&inode->i_mutex);
switch (origin) {
case 2:
offset += inode->i_size;
case SEEK_SET:
break;
case 1:
if ((need_32bit && file->f_pos == DIR_END_OFF_32BIT) ||
(!need_32bit && file->f_pos == DIR_END_OFF)) {
if (offset == 0)
GOTO(out, ret = file->f_pos);
else if (offset > 0)
GOTO(out, ret);
}
case SEEK_CUR:
offset += file->f_pos;
break;
case SEEK_END:
if (offset > 0)
GOTO(out, ret);
if (api32)
offset += LL_DIR_END_OFF_32BIT;
else
offset += LL_DIR_END_OFF;
break;
default:
GOTO(out, ret);
}

if (need_32bit && offset >= 0 && offset <= DIR_END_OFF_32BIT) {
if (offset >= 0 &&
((api32 && offset <= LL_DIR_END_OFF_32BIT) ||
(!api32 && offset <= LL_DIR_END_OFF))) {
if (offset != file->f_pos) {
if (offset == DIR_END_OFF_32BIT)
fd->fd_dir.lfd_pos = DIR_END_OFF;
else
if ((api32 && offset == LL_DIR_END_OFF_32BIT) ||
(!api32 && offset == LL_DIR_END_OFF))
fd->fd_dir.lfd_pos = MDS_DIR_END_OFF;
else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH)
fd->fd_dir.lfd_pos = offset << 32;
file->f_pos = offset;
file->f_version = 0;
}
ret = offset;
} else if (!need_32bit && (offset >= 0 || offset == DIR_END_OFF)) {
if (offset != file->f_pos) {
fd->fd_dir.lfd_pos = offset;
else
fd->fd_dir.lfd_pos = offset;
file->f_pos = offset;
file->f_version = 0;
}
ret = offset;
}
EXIT;
GOTO(out, ret);

out:
mutex_unlock(&inode->i_mutex);
Expand Down
7 changes: 4 additions & 3 deletions lustre/llite/file.c
Expand Up @@ -3431,18 +3431,19 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
struct lookup_intent *it, struct kstat *stat)
{
struct inode *inode = de->d_inode;
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ll_inode_info *lli = ll_i2info(inode);
int res = 0;

res = ll_inode_revalidate_it(de, it);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);

if (res)
return res;

stat->dev = inode->i_sb->s_dev;
if (cfs_curproc_is_32bit())
stat->ino = ll_fid_build_ino32((struct ll_fid *)&lli->lli_fid);
if (ll_need_32bit_api(sbi))
stat->ino = ll_fid_build_ino((struct ll_fid *)&lli->lli_fid, 1);
else
stat->ino = inode->i_ino;
stat->mode = inode->i_mode;
Expand Down
10 changes: 7 additions & 3 deletions lustre/llite/llite_internal.h
Expand Up @@ -70,6 +70,10 @@ struct lustre_intent_data {
#define FMODE_EXEC 0
#endif

/** Only used on client-side for indicating the tail of dir hash/offset. */
#define LL_DIR_END_OFF 0x7fffffffffffffffULL
#define LL_DIR_END_OFF_32BIT 0x7fffffffUL

#ifndef DCACHE_LUSTRE_INVALID
#define DCACHE_LUSTRE_INVALID 0x100
#endif
Expand Down Expand Up @@ -304,7 +308,8 @@ enum stats_track_type {
#define LL_SBI_LLITE_CHECKSUM 0x100 /* checksum each page in memory */
#define LL_SBI_LAZYSTATFS 0x200 /* lazystatfs mount option */
#define LL_SBI_32BIT_API 0x400 /* generate 32 bit inodes. */
#define LL_SBI_VERBOSE 0x800 /* verbose mount/umount */
#define LL_SBI_64BIT_HASH 0x800 /* support 64-bits dir hash/offset */
#define LL_SBI_VERBOSE 0x1000 /* verbose mount/umount */

/* default value for ll_sb_info->contention_time */
#define SBI_DEFAULT_CONTENTION_SECONDS 60
Expand Down Expand Up @@ -1208,8 +1213,7 @@ enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
void ll_iocontrol_unregister(void *magic);

__u64 ll_fid_build_ino(const struct ll_fid *fid);
__u32 ll_fid_build_ino32(const struct ll_fid *fid);
__u64 ll_fid_build_ino(const struct ll_fid *fid, int api32);
__u32 ll_fid_build_gen(struct ll_sb_info *sbi,
struct ll_fid *fid);

Expand Down
11 changes: 8 additions & 3 deletions lustre/llite/llite_lib.c
Expand Up @@ -380,6 +380,11 @@ static int client_common_fill_super(struct super_block *sb,
if (data->ocd_connect_flags & OBD_CONNECT_JOIN)
sbi->ll_flags |= LL_SBI_JOIN;

if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH) {
LCONSOLE_INFO("client supports 64-bits dir hash/offset!\n");
sbi->ll_flags |= LL_SBI_64BIT_HASH;
}

obd = class_name2obd(osc);
if (!obd) {
CERROR("OSC %s: not setup or attached\n", osc);
Expand Down Expand Up @@ -500,7 +505,7 @@ static int client_common_fill_super(struct super_block *sb,
}

LASSERT(sbi->ll_rootino != 0);
root = ll_iget(sb, ll_fid_build_ino(&rootfid), &md);
root = ll_iget(sb, ll_fid_build_ino(&rootfid, 0), &md);

ptlrpc_req_finished(request);

Expand Down Expand Up @@ -1979,7 +1984,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
}
#endif

inode->i_ino = ll_fid_build_ino(&body->fid1);
inode->i_ino = ll_fid_build_ino(&body->fid1, 0);
inode->i_generation = ll_fid_build_gen(sbi, &body->fid1);
*ll_inode_lu_fid(inode) = *((struct lu_fid*)&md->body->fid1);

Expand Down Expand Up @@ -2321,7 +2326,7 @@ int ll_prep_inode(struct obd_export *exp, struct inode **inode,
/** hashing VFS inode by FIDs.
* IGIF will be used for for compatibility if needed.
*/
*inode =ll_iget(sb, ll_fid_build_ino(&md.body->fid1), &md);
*inode =ll_iget(sb, ll_fid_build_ino(&md.body->fid1, 0), &md);
if (*inode == NULL || is_bad_inode(*inode)) {
mdc_free_lustre_md(exp, &md);
rc = -ENOMEM;
Expand Down

0 comments on commit fc5dd60

Please sign in to comment.