diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index ece8717878a..fc8984a0d2b 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -768,6 +768,7 @@ enum lu_dirent_attrs { LUDA_TYPE = 0x0002, }; +#define MDS_DIR_END_OFF 0xfffffffffffffffeULL extern void lustre_swab_ll_fid (struct ll_fid *fid); diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 0f32502fea0..e9c1e76b27a 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -434,12 +434,11 @@ static inline void ll_dir_chain_fini(struct ll_dir_chain *chain) { } -static inline unsigned long hash_x_index(__u64 hash) +static inline unsigned long hash_x_index(__u64 hash, int hash64) { #ifdef __KERNEL__ -# if BITS_PER_LONG == 32 - hash >>= 32; -# endif + if (BITS_PER_LONG == 32 && hash64) + hash >>= 32; #endif return ~0UL - hash; } @@ -573,9 +572,6 @@ static inline int lu_dirent_size(struct lu_dirent *ent) return le16_to_cpu(ent->lde_reclen); } -#define DIR_END_OFF 0xfffffffffffffffeULL -#define DIR_END_OFF_32BIT 0xfffffffeUL - #ifdef HAVE_RW_TREE_LOCK #define TREE_READ_LOCK_IRQ(mapping) read_lock_irq(&(mapping)->tree_lock) #define TREE_READ_UNLOCK_IRQ(mapping) read_unlock_irq(&(mapping)->tree_lock) @@ -640,13 +636,14 @@ static void ll_check_page(struct inode *dir, struct page *page) static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash, __u64 *start, __u64 *end) { + int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH; struct address_space *mapping = dir->i_mapping; /* * Complement of hash is used as an index so that * radix_tree_gang_lookup() can be used to find a page with starting * hash _smaller_ than one we are looking for. */ - unsigned long offset = hash_x_index(*hash); + unsigned long offset = hash_x_index(*hash, hash64); struct page *page; int found; ENTRY; @@ -670,14 +667,14 @@ static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash, wait_on_page(page); if (PageUptodate(page)) { dp = kmap(page); -#if BITS_PER_LONG == 32 - *start = le64_to_cpu(dp->ldp_hash_start) >> 32; - *end = le64_to_cpu(dp->ldp_hash_end) >> 32; - *hash = *hash >> 32; -#else - *start = le64_to_cpu(dp->ldp_hash_start); - *end = le64_to_cpu(dp->ldp_hash_end); -#endif + if (BITS_PER_LONG == 32 && hash64) { + *start = le64_to_cpu(dp->ldp_hash_start) >> 32; + *end = le64_to_cpu(dp->ldp_hash_end) >> 32; + *hash = *hash >> 32; + } else { + *start = le64_to_cpu(dp->ldp_hash_start); + *end = le64_to_cpu(dp->ldp_hash_end); + } LASSERTF(*start <= *hash, "start = "LPX64",end = " LPX64",hash = "LPX64"\n", *start, *end, *hash); if (*hash > *end || (*end != *start && *hash == *end)) { @@ -716,6 +713,7 @@ static struct page *ll_get_dir_page_20(struct file *filp, struct inode *dir, __u64 start = 0; __u64 end = 0; __u64 lhash = hash; + int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH; ENTRY; fid_build_reg_res_name(ll_inode_lu_fid(dir), &res_id); @@ -781,7 +779,7 @@ static struct page *ll_get_dir_page_20(struct file *filp, struct inode *dir, } } - page = read_cache_page(mapping, hash_x_index(hash), + page = read_cache_page(mapping, hash_x_index(hash, hash64), (filler_t*)ll_dir_readpage_20, filp); if (IS_ERR(page)) GOTO(out_unlock, page); @@ -797,23 +795,23 @@ static struct page *ll_get_dir_page_20(struct file *filp, struct inode *dir, hash_collision: dp = page_address(page); -#if BITS_PER_LONG == 32 - start = le64_to_cpu(dp->ldp_hash_start) >> 32; - end = le64_to_cpu(dp->ldp_hash_end) >> 32; - lhash = hash >> 32; -#else - start = le64_to_cpu(dp->ldp_hash_start); - end = le64_to_cpu(dp->ldp_hash_end); - lhash = hash; -#endif + if (BITS_PER_LONG == 32 && hash64) { + start = le64_to_cpu(dp->ldp_hash_start) >> 32; + end = le64_to_cpu(dp->ldp_hash_end) >> 32; + lhash = hash >> 32; + } else { + start = le64_to_cpu(dp->ldp_hash_start); + end = le64_to_cpu(dp->ldp_hash_end); + lhash = hash; + } if (end == start) { LASSERT(start == lhash); CWARN("Page-wide hash collision: "LPU64"\n", end); -#if BITS_PER_LONG == 32 - CWARN("Real page-wide hash collision at ["LPU64" "LPU64"] with " - "hash "LPU64"\n", le64_to_cpu(dp->ldp_hash_start), - le64_to_cpu(dp->ldp_hash_end), hash); -#endif + if (BITS_PER_LONG == 32 && hash64) + CWARN("Real page-wide hash collision at ["LPU64" "LPU64 + "] with hash "LPU64"\n", + le64_to_cpu(dp->ldp_hash_start), + le64_to_cpu(dp->ldp_hash_end), hash); /* * Fetch whole overflow chain... * @@ -837,21 +835,20 @@ static int ll_readdir_20(struct file *filp, void *cookie, filldir_t filldir) struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_file_data *fd = LUSTRE_FPRIVATE(filp); __u64 pos = fd->fd_dir.lfd_pos; + int api32 = ll_need_32bit_api(sbi); + int hash64= sbi->ll_flags & LL_SBI_64BIT_HASH; struct page *page; struct ll_dir_chain chain; - int rc; - int done; - int shift,need_32bit; - __u16 type; + int rc; + int done; + int shift; ENTRY; - need_32bit = ll_need_32bit_api(sbi); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n", inode->i_ino, inode->i_generation, inode, - (unsigned long)pos, i_size_read(inode), need_32bit); + (unsigned long)pos, i_size_read(inode), api32); - if (pos == DIR_END_OFF) + if (pos == MDS_DIR_END_OFF) /* * end-of-file. */ @@ -875,17 +872,17 @@ static int ll_readdir_20(struct file *filp, void *cookie, filldir_t filldir) * If page is empty (end of directoryis reached), * use this value. */ - __u64 hash = DIR_END_OFF; + __u64 hash = MDS_DIR_END_OFF; __u64 next; dp = page_address(page); for (ent = lu_dirent_start(dp); ent != NULL && !done; ent = lu_dirent_next(ent)) { - char *name; + __u16 type; int namelen; struct lu_fid fid; - __u64 ino; __u64 lhash; + __u64 ino; hash = le64_to_cpu(ent->lde_hash); if (hash < pos) @@ -902,25 +899,22 @@ static int ll_readdir_20(struct file *filp, void *cookie, filldir_t filldir) */ continue; - name = ent->lde_name; fid_le_to_cpu(&fid, &ent->lde_fid); - if (need_32bit) { + ino = ll_fid_build_ino((struct ll_fid *)&fid, + api32); + if (api32 && hash64) lhash = hash >> 32; - ino = ll_fid_build_ino32((struct ll_fid *)&fid); - } else { + else lhash = hash; - ino = ll_fid_build_ino((struct ll_fid *)&fid); - } - type = ll_dirent_type_get(ent); - done = filldir(cookie, name, namelen, + done = filldir(cookie, ent->lde_name, namelen, lhash, ino, type); } next = le64_to_cpu(dp->ldp_hash_end); ll_put_page(page); if (!done) { pos = next; - if (pos == DIR_END_OFF) { + if (pos == MDS_DIR_END_OFF) { /* * End of directory reached. */ @@ -951,13 +945,16 @@ static int ll_readdir_20(struct file *filp, void *cookie, filldir_t filldir) } fd->fd_dir.lfd_pos = pos; - if (need_32bit) { - if (pos == DIR_END_OFF) - filp->f_pos = DIR_END_OFF_32BIT; + if (pos == MDS_DIR_END_OFF) { + if (api32) + filp->f_pos = LL_DIR_END_OFF_32BIT; else - filp->f_pos = pos >> 32; + filp->f_pos = LL_DIR_END_OFF; } else { - filp->f_pos = pos; + if (api32 && hash64) + filp->f_pos = pos >> 32; + else + filp->f_pos = pos; } filp->f_version = inode->i_version; touch_atime(filp->f_vfsmnt, filp->f_dentry); @@ -1661,9 +1658,9 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) { struct inode *inode = file->f_mapping->host; - struct ll_sb_info *sbi = ll_i2sbi(inode); - int need_32bit = ll_need_32bit_api(sbi); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_sb_info *sbi = ll_i2sbi(inode); + int api32 = ll_need_32bit_api(sbi); loff_t ret = -EINVAL; ENTRY; @@ -1672,40 +1669,40 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) mutex_lock(&inode->i_mutex); switch (origin) { - case 2: - offset += inode->i_size; + case SEEK_SET: break; - case 1: - if ((need_32bit && file->f_pos == DIR_END_OFF_32BIT) || - (!need_32bit && file->f_pos == DIR_END_OFF)) { - if (offset == 0) - GOTO(out, ret = file->f_pos); - else if (offset > 0) - GOTO(out, ret); - } + case SEEK_CUR: offset += file->f_pos; break; + case SEEK_END: + if (offset > 0) + GOTO(out, ret); + if (api32) + offset += LL_DIR_END_OFF_32BIT; + else + offset += LL_DIR_END_OFF; + break; + default: + GOTO(out, ret); } - if (need_32bit && offset >= 0 && offset <= DIR_END_OFF_32BIT) { + if (offset >= 0 && + ((api32 && offset <= LL_DIR_END_OFF_32BIT) || + (!api32 && offset <= LL_DIR_END_OFF))) { if (offset != file->f_pos) { - if (offset == DIR_END_OFF_32BIT) - fd->fd_dir.lfd_pos = DIR_END_OFF; - else + if ((api32 && offset == LL_DIR_END_OFF_32BIT) || + (!api32 && offset == LL_DIR_END_OFF)) + fd->fd_dir.lfd_pos = MDS_DIR_END_OFF; + else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH) fd->fd_dir.lfd_pos = offset << 32; - file->f_pos = offset; - file->f_version = 0; - } - ret = offset; - } else if (!need_32bit && (offset >= 0 || offset == DIR_END_OFF)) { - if (offset != file->f_pos) { - fd->fd_dir.lfd_pos = offset; + else + fd->fd_dir.lfd_pos = offset; file->f_pos = offset; file->f_version = 0; } ret = offset; } - EXIT; + GOTO(out, ret); out: mutex_unlock(&inode->i_mutex); diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 79c0859bffe..24a914083f5 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -3431,18 +3431,19 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, struct lookup_intent *it, struct kstat *stat) { struct inode *inode = de->d_inode; + struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_inode_info *lli = ll_i2info(inode); int res = 0; res = ll_inode_revalidate_it(de, it); - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1); + ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1); if (res) return res; stat->dev = inode->i_sb->s_dev; - if (cfs_curproc_is_32bit()) - stat->ino = ll_fid_build_ino32((struct ll_fid *)&lli->lli_fid); + if (ll_need_32bit_api(sbi)) + stat->ino = ll_fid_build_ino((struct ll_fid *)&lli->lli_fid, 1); else stat->ino = inode->i_ino; stat->mode = inode->i_mode; diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index e7674436e54..de4834a02b9 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -70,6 +70,10 @@ struct lustre_intent_data { #define FMODE_EXEC 0 #endif +/** Only used on client-side for indicating the tail of dir hash/offset. */ +#define LL_DIR_END_OFF 0x7fffffffffffffffULL +#define LL_DIR_END_OFF_32BIT 0x7fffffffUL + #ifndef DCACHE_LUSTRE_INVALID #define DCACHE_LUSTRE_INVALID 0x100 #endif @@ -304,7 +308,8 @@ enum stats_track_type { #define LL_SBI_LLITE_CHECKSUM 0x100 /* checksum each page in memory */ #define LL_SBI_LAZYSTATFS 0x200 /* lazystatfs mount option */ #define LL_SBI_32BIT_API 0x400 /* generate 32 bit inodes. */ -#define LL_SBI_VERBOSE 0x800 /* verbose mount/umount */ +#define LL_SBI_64BIT_HASH 0x800 /* support 64-bits dir hash/offset */ +#define LL_SBI_VERBOSE 0x1000 /* verbose mount/umount */ /* default value for ll_sb_info->contention_time */ #define SBI_DEFAULT_CONTENTION_SECONDS 60 @@ -1208,8 +1213,7 @@ enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file, void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd); void ll_iocontrol_unregister(void *magic); -__u64 ll_fid_build_ino(const struct ll_fid *fid); -__u32 ll_fid_build_ino32(const struct ll_fid *fid); +__u64 ll_fid_build_ino(const struct ll_fid *fid, int api32); __u32 ll_fid_build_gen(struct ll_sb_info *sbi, struct ll_fid *fid); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index f2efb9b1629..6b68a773ad1 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -380,6 +380,11 @@ static int client_common_fill_super(struct super_block *sb, if (data->ocd_connect_flags & OBD_CONNECT_JOIN) sbi->ll_flags |= LL_SBI_JOIN; + if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH) { + LCONSOLE_INFO("client supports 64-bits dir hash/offset!\n"); + sbi->ll_flags |= LL_SBI_64BIT_HASH; + } + obd = class_name2obd(osc); if (!obd) { CERROR("OSC %s: not setup or attached\n", osc); @@ -500,7 +505,7 @@ static int client_common_fill_super(struct super_block *sb, } LASSERT(sbi->ll_rootino != 0); - root = ll_iget(sb, ll_fid_build_ino(&rootfid), &md); + root = ll_iget(sb, ll_fid_build_ino(&rootfid, 0), &md); ptlrpc_req_finished(request); @@ -1979,7 +1984,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) } #endif - inode->i_ino = ll_fid_build_ino(&body->fid1); + inode->i_ino = ll_fid_build_ino(&body->fid1, 0); inode->i_generation = ll_fid_build_gen(sbi, &body->fid1); *ll_inode_lu_fid(inode) = *((struct lu_fid*)&md->body->fid1); @@ -2321,7 +2326,7 @@ int ll_prep_inode(struct obd_export *exp, struct inode **inode, /** hashing VFS inode by FIDs. * IGIF will be used for for compatibility if needed. */ - *inode =ll_iget(sb, ll_fid_build_ino(&md.body->fid1), &md); + *inode =ll_iget(sb, ll_fid_build_ino(&md.body->fid1, 0), &md); if (*inode == NULL || is_bad_inode(*inode)) { mdc_free_lustre_md(exp, &md); rc = -ENOMEM; diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 671b0ff4898..d48050bccc8 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -115,23 +115,14 @@ static inline __u32 fid_flatten32(const struct lu_fid *fid) RETURN(ino ? ino : fid_oid(fid)); } -/** - * for 32 bit inode numbers directly map seq+oid to 32bit number. - */ -__u32 ll_fid_build_ino32(const struct ll_fid *fid) -{ - RETURN(fid_flatten32((struct lu_fid *)fid)); -} - /** * build inode number from passed @fid */ -__u64 ll_fid_build_ino(const struct ll_fid *fid) +__u64 ll_fid_build_ino(const struct ll_fid *fid, int api32) { -#if BITS_PER_LONG == 32 - RETURN(fid_flatten32((struct lu_fid *)fid)); -#else - RETURN(fid_flatten((struct lu_fid *)fid)); -#endif + if (BITS_PER_LONG == 32 || api32) + RETURN(fid_flatten32((struct lu_fid *)fid)); + else + RETURN(fid_flatten((struct lu_fid *)fid)); } __u32 ll_fid_build_gen(struct ll_sb_info *sbi, struct ll_fid *fid)