Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

TRIM support

Signed-off-by: Samuel J. Greear <sjg@thesjg.com>
  • Loading branch information...
commit e0fb398bfbef1fb6d12dfb8308cdc83ce663cbc2 1 parent 4005878
@bissont bissont authored thesjg committed
View
2  etc/rc.d/fsck
@@ -6,7 +6,7 @@
#
# PROVIDE: fsck
-# REQUIRE: localswap
+# REQUIRE: disks
. /etc/rc.subr
View
3  etc/rc.d/swap1
@@ -6,7 +6,8 @@
#
# PROVIDE: localswap
-# REQUIRE: disks
+# REQUIRE: savecore
+# BEFORE: SERVERS
# KEYWORD: shutdown
. /etc/rc.subr
View
9 sbin/fdisk/fdisk.8
@@ -89,6 +89,15 @@ would otherwise wrap.
This typically causes BIOSes to properly detect
that the disk should be put in Large mode.
This option may be needed on very old PCs.
+.It Fl E
+Use TRIM to erase the device/partition before creating the file system. The
+underlying device must have the Trim sysctl enabled. Only devices that
+support TRIM will have such a sysctl option (kern.cam.da.X.trim_enabled). For
+use with the
+.Fl I
+or
+.Fl u
+option.
.It Fl f Ar configfile
Set slice values using the file
.Ar configfile .
View
69 sbin/fdisk/fdisk.c
@@ -30,6 +30,8 @@
#include <sys/types.h>
#include <sys/diskslice.h>
#include <sys/diskmbr.h>
+#include <sys/ioctl_compat.h>
+#include <sys/sysctl.h>
#include <sys/stat.h>
#include <ctype.h>
#include <fcntl.h>
@@ -120,6 +122,7 @@ typedef struct cmd {
static int B_flag = 0; /* replace boot code */
static int C_flag = 0; /* use wrapped values for CHS */
+static int E_flag = 0; /* Erase through TRIM */
static int I_flag = 0; /* use entire disk for DragonFly */
static int a_flag = 0; /* set active partition */
static char *b_flag = NULL; /* path to boot code */
@@ -235,6 +238,7 @@ static void change_code();
static void get_params_to_use();
static void dos(struct dos_partition *partp);
static int open_disk(int u_flag);
+static void erase_partition(int i);
static ssize_t read_disk(off_t sector, void *buf);
static ssize_t write_disk(off_t sector, void *buf);
static int get_params();
@@ -258,7 +262,7 @@ main(int argc, char *argv[])
{
int c, i;
- while ((c = getopt(argc, argv, "BCIab:f:p:istuv1234")) != -1)
+ while ((c = getopt(argc, argv, "BCEIab:f:p:istuv1234")) != -1)
switch (c) {
case 'B':
B_flag = 1;
@@ -266,6 +270,9 @@ main(int argc, char *argv[])
case 'C':
C_flag = 1;
break;
+ case 'E':
+ E_flag = 1;
+ break;
case 'I':
I_flag = 1;
break;
@@ -384,6 +391,12 @@ main(int argc, char *argv[])
dos(partp);
if (v_flag)
print_s0(-1);
+
+ if (E_flag) {
+ /* Trim now if we're using the entire device */
+ erase_partition(0);
+ }
+
if (!t_flag)
write_s0();
exit(0);
@@ -444,8 +457,20 @@ main(int argc, char *argv[])
}
print_s0(-1);
if (!t_flag) {
- if (ok("Should we write new partition table?"))
+ if (ok("Should we write new partition table?")) {
+ if (E_flag && u_flag) {
+ /*
+ * Trim now because we've committed to
+ * updating the partition.
+ */
+ if (partition == -1)
+ for (i = 0; i < NDOSPART; i++)
+ erase_partition(i);
+ else
+ erase_partition(partition);
+ }
write_s0();
+ }
}
else
{
@@ -762,6 +787,46 @@ dos(struct dos_partition *partp)
int fd;
+static void
+erase_partition(int i)
+{
+ struct dos_partition *partp;
+ off_t ioarg[2];
+
+ char sysctl_name[64];
+ int trim_enabled = 0;
+ size_t olen = sizeof(trim_enabled);
+ char *dev_name = strdup(disk);
+
+ dev_name = strtok(dev_name + strlen("/dev/da"),"s");
+ sprintf(sysctl_name, "kern.cam.da.%s.trim_enabled", dev_name);
+ sysctlbyname(sysctl_name, &trim_enabled, &olen, NULL, 0);
+ if(errno == ENOENT) {
+ printf("Device:%s does not support the TRIM command\n", disk);
+ usage();
+ }
+ if(!trim_enabled) {
+ printf("Erase device option selected, but sysctl (%s) "
+ "is not enabled\n",sysctl_name);
+ usage();
+ }
+ partp = ((struct dos_partition *) &mboot.parts) + i;
+ printf("erase sectors:%u %u\n",
+ partp->dp_start,
+ partp->dp_size);
+
+ /* Trim the Device */
+ ioarg[0] = partp->dp_start;
+ ioarg[0] *=secsize;
+ ioarg[1] = partp->dp_size;
+ ioarg[1] *=secsize;
+
+ if (ioctl(fd, IOCTLTRIM, ioarg) < 0) {
+ printf("Device trim failed\n");
+ usage ();
+ }
+}
+
/* Getting device status */
static int
View
1  sbin/hammer/hammer_util.h
@@ -80,6 +80,7 @@ struct volume_info {
char *name;
int fd;
off_t size;
+ off_t device_offset;
const char *type;
struct hammer_volume_ondisk *ondisk;
View
1  sbin/mount/mntopts.h
@@ -52,6 +52,7 @@ struct mntopt {
#define MOPT_NOEXEC { "exec", 1, MNT_NOEXEC, 0 }
#define MOPT_NOSUID { "suid", 1, MNT_NOSUID, 0 }
#define MOPT_NOSYMFOLLOW { "symfollow", 1, MNT_NOSYMFOLLOW, 0 }
+#define MOPT_TRIM { "trim", 0, MNT_TRIM, 0 }
#define MOPT_RDONLY { "rdonly", 0, MNT_RDONLY, 0 }
#define MOPT_SYNC { "sync", 0, MNT_SYNCHRONOUS, 0 }
#define MOPT_UNION { "union", 0, MNT_UNION, 0 }
View
4 sbin/mount/mount.8
@@ -193,6 +193,10 @@ mount the file system read-only (even the super-user may not write it).
All
.Tn I/O
to the file system should be done synchronously.
+.It Cm trim
+If the device supports trim (kern.cam.da.X.trim_enabled exists) and is set,
+the file system will perform online trim for corresponding block deletions.
+Currently UFS only supports this feature.
.It Cm suiddir
A directory on the mounted filesystem will respond to the SUID bit
being set, by setting the owner of any new files to be the same
View
23 sbin/mount/mount_ufs.c
@@ -38,11 +38,13 @@
#include <sys/param.h>
#include <sys/mount.h>
+#include <sys/sysctl.h>
#include <err.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <unistd.h>
#include <vfs/ufs/ufsmount.h>
@@ -59,6 +61,7 @@ static struct mntopt mopts[] = {
MOPT_SYNC,
MOPT_UPDATE,
MOPT_IGNORE,
+ MOPT_TRIM,
MOPT_NULL
};
@@ -98,6 +101,26 @@ mount_ufs(int argc, const char **argv)
else
args.export.ex_flags = 0;
+ if (mntflags & MNT_TRIM){
+ char sysctl_name[64];
+ int trim_enabled = 0;
+ size_t olen = sizeof(trim_enabled);
+ char *dev_name = strdup(args.fspec);
+ dev_name = strtok(dev_name + strlen("/dev/da"),"s");
+ sprintf(sysctl_name, "kern.cam.da.%s.trim_enabled", dev_name);
+ sysctlbyname(sysctl_name, &trim_enabled, &olen, NULL, 0);
+ if(errno == ENOENT) {
+ printf("Device:%s does not support the TRIM command\n",
+ args.fspec);
+ ufs_usage();
+ }
+ if(!trim_enabled) {
+ printf("Online TRIM selected, but sysctl (%s) "
+ "is not enabled\n",sysctl_name);
+ ufs_usage();
+ }
+ }
+
error = getvfsbyname("ufs", &vfc);
if (error && vfsisloadable("ufs")) {
if (vfsload("ufs")) {
View
29 sbin/newfs/mkfs.c
@@ -38,6 +38,7 @@
#include "defs.h"
#include <stdlib.h>
+#include <sys/ioctl_compat.h>
/*
* make file system for cylinder-group style file systems
@@ -70,6 +71,8 @@ extern int Lflag; /* add a volume label */
extern int Nflag; /* run mkfs without writing file system */
extern int Oflag; /* format as an 4.3BSD file system */
extern int Uflag; /* enable soft updates for file system */
+extern int Eflag; /* erase contents using TRIM */
+extern uint64_t slice_offset; /* Pysical device slice offset */
extern u_long fssize; /* file system size */
extern int ntracks; /* # tracks/cylinder */
extern int nsectors; /* # sectors/track */
@@ -136,6 +139,7 @@ void parentready(int);
void rdfs(daddr_t, int, char *);
void setblock(struct fs *, unsigned char *, int);
void started(int);
+void erfs(off_t, off_t);
void wtfs(daddr_t, int, char *);
void wtfsflush(void);
@@ -236,6 +240,7 @@ mkfs(char *fsys, int fi, int fo, const char *mfscopy)
sblock.fs_flags |= FS_DOSOFTDEP;
if (Lflag)
strlcpy(sblock.fs_volname, volumelabel, MAXVOLLEN);
+
/*
* Validate the given file system size.
* Verify that its last block can actually be accessed.
@@ -677,6 +682,16 @@ mkfs(char *fsys, int fi, int fo, const char *mfscopy)
sblock.fs_flags & FS_DOSOFTDEP ? " SOFTUPDATES" : "");
#undef B2MBFACTOR
}
+
+ if (Eflag && !Nflag) {
+ printf("Erasing sectors [%ld --- %ld]\n",
+ (SBOFF+ slice_offset)/sectorsize,
+ fsbtodb(&sblock,sblock.fs_size) -
+ ((SBOFF + slice_offset)/ sectorsize) - 1);
+ erfs(SBOFF+ slice_offset, (fsbtodb(&sblock,sblock.fs_size) -
+ ((SBOFF + slice_offset)/ sectorsize) - 1) *
+ (unsigned long long)sectorsize);
+ }
/*
* Now build the cylinders group blocks and
* then print out indices of cylinder groups.
@@ -1246,6 +1261,20 @@ wtfsflush(void)
}
/*
+ * Issue ioctl to erase range of sectors using TRIM
+ */
+void
+erfs(off_t byte_start, off_t size)
+{
+ off_t ioarg[2];
+ ioarg[0] = byte_start;
+ ioarg[1] = size;
+ if (ioctl(fsi, IOCTLTRIM, ioarg) < 0) {
+ err(37, "Device trim failed\n");
+ }
+}
+
+/*
* write a block to the file system
*/
void
View
6 sbin/newfs/newfs.8
@@ -43,7 +43,7 @@
.Sh SYNOPSIS
.Nm
.Op Fl L Ar volname
-.Op Fl NCOU
+.Op Fl NCOURE
.Op Fl S Ar sector-size
.Op Fl T Ar disktype
.Op Fl a Ar maxcontig
@@ -172,6 +172,10 @@ instead of trying to get geometry information from the
storage device.
.It Fl U
Enables soft updates on the new filesystem.
+.It Fl E
+Use TRIM to erase the device's data before creating the file system. The
+underlying device must have the Trim sysctl enabled. Only devices that support
+TRIM will have such a sysctl option (kern.cam.da.X.trim_enabled).
.It Fl a Ar maxcontig
Specify the maximum number of contiguous blocks that will be
laid out before forcing a rotational delay (see the
View
37 sbin/newfs/newfs.c
@@ -44,6 +44,7 @@
#include <sys/diskslice.h>
#include <sys/file.h>
#include <sys/mount.h>
+#include <sys/sysctl.h>
#include <vfs/ufs/dir.h>
#include <vfs/ufs/dinode.h>
@@ -163,6 +164,8 @@ int Nflag; /* run without writing file system */
int Oflag; /* format as an 4.3BSD file system */
int Cflag; /* copy underlying filesystem (mfs only) */
int Uflag; /* enable soft updates for file system */
+int Eflag; /* erase contents using TRIM */
+uint64_t slice_offset; /* Pysical device slice offset */
u_long fssize; /* file system size */
int ntracks = NTRACKS; /* # tracks/cylinder */
int nsectors = NSECTORS; /* # sectors/track */
@@ -237,9 +240,12 @@ main(int argc, char **argv)
opstring = mfs ?
"L:NCF:T:Ua:b:c:d:e:f:g:h:i:m:o:s:v" :
- "L:NOS:T:Ua:b:c:d:e:f:g:h:i:k:l:m:n:o:p:r:s:t:u:vx:";
+ "L:NREOS:T:Ua:b:c:d:e:f:g:h:i:k:l:m:n:o:p:r:s:t:u:vx:";
while ((ch = getopt(argc, argv, opstring)) != -1) {
switch (ch) {
+ case 'E':
+ Eflag = 1;
+ break;
case 'L':
volumelabel = optarg;
i = -1;
@@ -425,6 +431,30 @@ main(int argc, char **argv)
if (stat(special, &st) < 0 && special[0] && special[0] != '/')
asprintf(&special, "/dev/%s", special);
+ if (Eflag) {
+ char sysctl_name[64];
+ int trim_enabled = 0;
+ size_t olen = sizeof(trim_enabled);
+ char *dev_name = strdup(special);
+
+ dev_name = strtok(dev_name + strlen("/dev/da"),"s");
+ sprintf(sysctl_name, "kern.cam.da.%s.trim_enabled",
+ dev_name);
+
+ sysctlbyname(sysctl_name, &trim_enabled, &olen, NULL, 0);
+
+ if(errno == ENOENT) {
+ printf("Device:%s does not support the TRIM command\n",
+ special);
+ usage();
+ }
+ if(!trim_enabled) {
+ printf("Erase device option selected, but sysctl (%s) "
+ "is not enabled\n",sysctl_name);
+ usage();
+
+ }
+ }
if (Nflag) {
fso = -1;
} else {
@@ -505,6 +535,7 @@ main(int argc, char **argv)
/* geom.d_ncylinders not used */
geom.d_media_blocks = pinfo.media_blocks;
geom.d_media_size = pinfo.media_size;
+ slice_offset = pinfo.media_offset;
}
if (geom.d_media_blocks == 0 || geom.d_media_size == 0) {
fatal("%s: is unavailable", argv[0]);
@@ -698,7 +729,7 @@ fatal(const char *fmt, ...)
/*NOTREACHED*/
}
-static void
+void
usage(void)
{
if (mfs) {
@@ -716,10 +747,12 @@ usage(void)
#endif
fprintf(stderr, "where fsoptions are:\n");
fprintf(stderr, "\t-C (mfs) Copy the underlying filesystem to the MFS mount\n");
+ fprintf(stderr, "\t-E erase file system contents using TRIM\n");
fprintf(stderr, "\t-L volume name\n");
fprintf(stderr,
"\t-N do not create file system, just print out parameters\n");
fprintf(stderr, "\t-O create a 4.3BSD format filesystem\n");
+ fprintf(stderr, "\t-R enable TRIM\n");
fprintf(stderr, "\t-S sector size\n");
#ifdef COMPAT
fprintf(stderr, "\t-T disktype\n");
View
6 sbin/newfs_hammer/newfs_hammer.8
@@ -40,7 +40,7 @@
.Sh SYNOPSIS
.Nm
.Fl L Ar label
-.Op Fl f
+.Op Fl fE
.Op Fl b Ar bootsize
.Op Fl m Ar savesize
.Op Fl u Ar undosize
@@ -119,6 +119,10 @@ This is needed for the creation of a
file system less than 10GB size or
with less than 500MB UNDO/REDO FIFO.
This should not be used under normal circumstances.
+.It Fl E
+Use TRIM to erase the device's data before creating the file system. The
+underlying device must have the Trim sysctl enabled. Only devices that support
+TRIM will have such a sysctl option (kern.cam.da.X.trim_enabled).
.It Fl m Ar savesize
Specify a fixed area which
.Nm HAMMER
View
61 sbin/newfs_hammer/newfs_hammer.c
@@ -38,6 +38,7 @@
static int64_t getsize(const char *str, int64_t minval, int64_t maxval, int pw);
static const char *sizetostr(off_t size);
+static void trim_volume(struct volume_info *vol);
static void check_volume(struct volume_info *vol);
static void format_volume(struct volume_info *vol, int nvols,const char *label,
off_t total_size);
@@ -47,6 +48,7 @@ static void usage(void);
static int ForceOpt = 0;
static int HammerVersion = -1;
+static int Eflag = 0;
#define GIG (1024LL*1024*1024)
@@ -82,11 +84,14 @@ main(int ac, char **av)
/*
* Parse arguments
*/
- while ((ch = getopt(ac, av, "fL:b:m:u:V:")) != -1) {
+ while ((ch = getopt(ac, av, "fEL:b:m:u:V:")) != -1) {
switch(ch) {
case 'f':
ForceOpt = 1;
break;
+ case 'E':
+ Eflag = 1;
+ break;
case 'L':
label = optarg;
break;
@@ -189,6 +194,30 @@ main(int ac, char **av)
* its remaining fields.
*/
check_volume(vol);
+ if (Eflag) {
+ char sysctl_name[64];
+ int trim_enabled = 0;
+ size_t olen = sizeof(trim_enabled);
+ char *dev_name = strdup(vol->name);
+ dev_name = strtok(dev_name + strlen("/dev/da"),"s");
+
+ sprintf(sysctl_name, "kern.cam.da.%s.trim_enabled",
+ dev_name);
+ errno=0;
+ sysctlbyname(sysctl_name, &trim_enabled, &olen, NULL, 0);
+ if(errno == ENOENT) {
+ printf("Device:%s (%s) does not support the "
+ "TRIM command\n", vol->name,sysctl_name);
+ usage();
+ }
+ if(!trim_enabled) {
+ printf("Erase device option selected, but "
+ "sysctl (%s) is not enabled\n", sysctl_name);
+ usage();
+
+ }
+ trim_volume(vol);
+ }
total += vol->size;
}
@@ -273,7 +302,7 @@ void
usage(void)
{
fprintf(stderr,
- "usage: newfs_hammer -L label [-f] [-b bootsize] [-m savesize] [-u undosize]\n"
+ "usage: newfs_hammer -L label [-fE] [-b bootsize] [-m savesize] [-u undosize]\n"
" [-V version] special ...\n"
);
exit(1);
@@ -392,6 +421,29 @@ nowtime(void)
}
/*
+ * TRIM the volume, but only if the backing store is a DEVICE
+ */
+static
+void
+trim_volume(struct volume_info *vol)
+{
+ if (strncmp(vol->type, "DEVICE", sizeof(vol->type)) == 0) {
+ off_t ioarg[2];
+
+ /* 1MB offset to prevent destroying disk-reserved area */
+ ioarg[0] = vol->device_offset;
+ ioarg[1] = vol->size;
+ printf("Trimming Device:%s, sectors (%llu -%llu)\n",vol->name,
+ (unsigned long long)ioarg[0]/512,
+ (unsigned long long)ioarg[1]/512);
+ if (ioctl(vol->fd, IOCTLTRIM, ioarg) < 0) {
+ printf("Device trim failed\n");
+ usage ();
+ }
+ }
+}
+
+/*
* Check basic volume characteristics. HAMMER filesystems use a minimum
* of a 16KB filesystem buffer size.
*/
@@ -416,6 +468,10 @@ check_volume(struct volume_info *vol)
err(1, "Unable to stat %s", vol->name);
vol->size = st.st_size;
vol->type = "REGFILE";
+
+ if (Eflag)
+ errx(1,"Cannot TRIM regular file %s\n", vol->name);
+
} else {
/*
* When formatting a block device as a HAMMER volume the
@@ -433,6 +489,7 @@ check_volume(struct volume_info *vol)
}
vol->size = pinfo.media_size;
+ vol->device_offset = pinfo.media_offset;
vol->type = "DEVICE";
}
printf("Volume %d %s %-15s size %s\n",
View
1  sbin/newfs_hammer/newfs_hammer.h
@@ -40,6 +40,7 @@
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/sysctl.h>
+#include <sys/ioctl_compat.h>
#include <stdio.h>
#include <stdlib.h>
View
10 sbin/swapon/swapon.8
@@ -40,10 +40,10 @@
.Nm swapon , swapoff , swapctl
.Nd "specify devices for paging and swapping"
.Sh SYNOPSIS
-.Nm swapon Fl aq | Ar
+.Nm swapon Fl aeq | Ar
.Nm swapoff Fl aq | Ar
.Nm swapctl
-.Op Fl AghklmsU
+.Op Fl AeghklmsU
.Oo
.Fl a Ar
|
@@ -78,6 +78,10 @@ If the
.Fl q
option is used informational messages will not be
written to standard output when a swap device is added.
+If the
+.Fl e
+option is used, the device will be trimmed if
+it supports trim and the trim_enabled sysctl is on.
.Pp
The
.Nm swapoff
@@ -154,6 +158,8 @@ Output values in megabytes.
List the devices making up system swap.
.It Fl s
Print a summary line for system swap.
+.It Fl e
+Attempts to Trim the device if -[Aa] is used.
.Pp
The
.Ev BLOCKSIZE
View
114 sbin/swapon/swapon.c
@@ -38,6 +38,8 @@
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
+#include <sys/diskslice.h>
+#include <sys/ioctl_compat.h>
#include <vm/vm_param.h>
#include <err.h>
@@ -51,7 +53,7 @@
#include <libutil.h>
static void usage(void);
-static int swap_on_off(char *name, int doingall);
+static int swap_on_off(char *name, int doingall, int trim);
static void swaplist(int lflag, int sflag, int hflag);
enum { SWAPON, SWAPOFF, SWAPCTL } orig_prog, which_prog = SWAPCTL;
@@ -63,7 +65,7 @@ main(int argc, char **argv)
char *ptr;
int ret;
int ch;
- int doall, sflag, lflag, hflag, qflag;
+ int doall, sflag, lflag, hflag, qflag, eflag;
if ((ptr = strrchr(argv[0], '/')) == NULL)
ptr = argv[0];
@@ -73,8 +75,8 @@ main(int argc, char **argv)
which_prog = SWAPOFF;
orig_prog = which_prog;
- sflag = lflag = hflag = qflag = doall = 0;
- while ((ch = getopt(argc, argv, "AadghklmqsU")) != -1) {
+ sflag = lflag = hflag = qflag = doall = eflag = 0;
+ while ((ch = getopt(argc, argv, "AadeghklmqsU")) != -1) {
switch((char)ch) {
case 'A':
if (which_prog == SWAPCTL) {
@@ -96,6 +98,9 @@ main(int argc, char **argv)
else
usage();
break;
+ case 'e':
+ eflag = 1;
+ break;
case 'g':
hflag = 'G';
break;
@@ -141,7 +146,7 @@ main(int argc, char **argv)
continue;
if (strstr(fsp->fs_mntops, "noauto"))
continue;
- if (swap_on_off(fsp->fs_spec, 1)) {
+ if (swap_on_off(fsp->fs_spec, 1, eflag)) {
ret = 1;
} else {
if (!qflag) {
@@ -156,7 +161,7 @@ main(int argc, char **argv)
usage();
}
for (; *argv; ++argv) {
- if (swap_on_off(getdevpath(*argv, 0), 0)) {
+ if (swap_on_off(getdevpath(*argv, 0), 0, eflag)) {
ret = 1;
} else if (orig_prog == SWAPCTL) {
printf("%s: %sing %s as swap device\n",
@@ -174,9 +179,100 @@ main(int argc, char **argv)
exit(ret);
}
+/*
+ * TRIM the device
+ */
+static
+void
+trim_volume(char * name)
+{
+ struct partinfo pinfo;
+ int fd,i,n;
+ size_t bytes = 0,ksize;
+ char *xswbuf;
+ struct xswdev *xsw;
+
+
+ /*
+ * Determine if this device is already being used by swap without
+ * calling swapon().
+ */
+ if ((sysctlbyname("vm.swap_info_array", NULL, &bytes, NULL, 0) < 0) ||
+ bytes == 0) {
+ err(1, "sysctlbyname()");
+ }
+
+ xswbuf = malloc(bytes);
+ if ((sysctlbyname("vm.swap_info_array", xswbuf, &bytes, NULL, 0) < 0) ||
+ bytes == 0) {
+ free(xswbuf);
+ err(1, "sysctlbyname()");
+ }
+
+ ksize = ((struct xswdev *)xswbuf)->xsw_size;
+ n = (int)(bytes / ksize);
+ for (i = 0; i < n; ++i) {
+ xsw = (void *)((char *)xswbuf + i * ksize);
+
+ if (xsw->xsw_dev == NODEV )
+ continue;
+ if(!strcmp(devname(xsw->xsw_dev, S_IFCHR),
+ name + strlen("/dev/"))) {
+ warnx("%s: device already a swap device", name);
+ exit(1);
+ }
+ }
+
+ /*
+ * Get the size and offset of this parititon/device
+ */
+ fd = open(name, O_RDWR);
+ if (fd < 0)
+ err(1, "Unable to open %s R+W", name);
+ if (ioctl(fd, DIOCGPART, &pinfo) < 0) {
+ printf("Cannot trim regular file\n");
+ usage ();
+ }
+ off_t ioarg[2];
+
+ /*Trim the Device*/
+ ioarg[0] = pinfo.media_offset;
+ ioarg[1] = pinfo.media_size;
+ printf("Trimming Device:%s, sectors (%llu -%llu)\n",name,
+ (unsigned long long)ioarg[0]/512,
+ (unsigned long long)ioarg[1]/512);
+ if (ioctl(fd, IOCTLTRIM, ioarg) < 0) {
+ printf("Device trim failed\n");
+ usage ();
+ }
+ close(fd);
+}
+
static int
-swap_on_off(char *name, int doingall)
+swap_on_off(char *name, int doingall, int trim)
{
+ if (which_prog == SWAPON && trim){
+ char sysctl_name[64];
+ int trim_enabled = 0;
+ size_t olen = sizeof(trim_enabled);
+ char *dev_name = strdup(name);
+ dev_name = strtok(dev_name + strlen("/dev/da"),"s");
+ sprintf(sysctl_name, "kern.cam.da.%s.trim_enabled", dev_name);
+ sysctlbyname(sysctl_name, &trim_enabled, &olen, NULL, 0);
+ if(errno == ENOENT) {
+ printf("Device:%s does not support the TRIM command\n",
+ name);
+ usage();
+ }
+ if(!trim_enabled) {
+ printf("Erase device option selected, but sysctl (%s) "
+ "is not enabled\n",sysctl_name);
+ usage();
+ }
+
+ trim_volume(name);
+
+ }
if ((which_prog == SWAPOFF ? swapoff(name) : swapon(name)) == -1) {
switch(errno) {
case EBUSY:
@@ -205,10 +301,10 @@ usage(void)
switch (orig_prog) {
case SWAPON:
case SWAPOFF:
- fprintf(stderr, "-aq | file ...\n");
+ fprintf(stderr, "-aeq | file ...\n");
break;
case SWAPCTL:
- fprintf(stderr, "[-AghklmsU] [-a file ... | -d file ...]\n");
+ fprintf(stderr, "[-AeghklmsU] [-a file ... | -d file ...]\n");
break;
}
exit(1);
View
2  sys/bus/cam/cam_ccb.h
@@ -188,6 +188,8 @@ typedef enum {
/* Notify Host Target driver of event */
XPT_NOTIFY_ACK = 0x35,
/* Acknowledgement of event */
+ XPT_TRIM = 0x36 | XPT_FC_DEV_QUEUED,
+ /* TRIM */
/* Vendor Unique codes: 0x80->0x8F */
XPT_VUNIQUE = 0x80
View
1  sys/bus/cam/cam_xpt.c
@@ -2962,6 +2962,7 @@ xpt_action(union ccb *start_ccb)
switch (start_ccb->ccb_h.func_code) {
case XPT_SCSI_IO:
+ case XPT_TRIM:
{
struct cam_ed *device;
#ifdef CAMDEBUG
View
1  sys/bus/cam/scsi/scsi_all.h
@@ -600,6 +600,7 @@ struct ata_pass_16 {
#define WRITE_BUFFER 0x3b
#define READ_BUFFER 0x3c
#define CHANGE_DEFINITION 0x40
+#define TRIM 0x42
#define LOG_SELECT 0x4c
#define LOG_SENSE 0x4d
#define MODE_SELECT_10 0x55
View
239 sys/bus/cam/scsi/scsi_da.c
@@ -47,6 +47,7 @@
#include <sys/malloc.h>
#include <sys/cons.h>
#include <sys/proc.h>
+#include <sys/ioctl_compat.h>
#include <sys/buf2.h>
#include <sys/thread2.h>
@@ -96,7 +97,8 @@ typedef enum {
DA_FLAG_OPEN = 0x100,
DA_FLAG_SCTX_INIT = 0x200,
DA_FLAG_RD_LIMIT = 0x400,
- DA_FLAG_WR_LIMIT = 0x800
+ DA_FLAG_WR_LIMIT = 0x800,
+ DA_FLAG_CAN_TRIM = 0x1000
} da_flags;
typedef enum {
@@ -112,6 +114,7 @@ typedef enum {
DA_CCB_BUFFER_IO = 0x03,
DA_CCB_WAITING = 0x04,
DA_CCB_DUMP = 0x05,
+ DA_CCB_TRIM = 0x06,
DA_CCB_TYPE_MASK = 0x0F,
DA_CCB_RETRY_UA = 0x10
} da_ccb_state;
@@ -128,9 +131,17 @@ struct disk_params {
u_int64_t sectors; /* total number sectors */
};
+#define TRIM_MAX_BLOCKS 8
+#define TRIM_MAX_RANGES TRIM_MAX_BLOCKS * 64
+struct trim_request {
+ uint8_t data[TRIM_MAX_RANGES * 8];
+ struct bio *bios[TRIM_MAX_RANGES];
+};
+
struct da_softc {
struct bio_queue_head bio_queue_rd;
struct bio_queue_head bio_queue_wr;
+ struct bio_queue_head bio_queue_trim;
struct devstat device_stats;
SLIST_ENTRY(da_softc) links;
LIST_HEAD(, ccb_hdr) pending_ccbs;
@@ -141,6 +152,9 @@ struct da_softc {
int ordered_tag_count;
int outstanding_cmds_rd;
int outstanding_cmds_wr;
+ int trim_max_ranges;
+ int trim_running;
+ int trim_enabled;
struct disk_params params;
struct disk disk;
union ccb saved_ccb;
@@ -148,6 +162,7 @@ struct da_softc {
struct sysctl_ctx_list sysctl_ctx;
struct sysctl_oid *sysctl_tree;
struct callout sendordered_c;
+ struct trim_request trim_req;
};
struct da_quirk_entry {
@@ -323,6 +338,7 @@ static d_open_t daopen;
static d_close_t daclose;
static d_strategy_t dastrategy;
static d_dump_t dadump;
+static d_ioctl_t daioctl;
static periph_init_t dainit;
static void daasync(void *callback_arg, u_int32_t code,
struct cam_path *path, void *arg);
@@ -404,7 +420,8 @@ static struct dev_ops da_ops = {
.d_read = physread,
.d_write = physwrite,
.d_strategy = dastrategy,
- .d_dump = dadump
+ .d_dump = dadump,
+ .d_ioctl = daioctl
};
static struct extend_array *daperiphs;
@@ -412,6 +429,85 @@ static struct extend_array *daperiphs;
MALLOC_DEFINE(M_SCSIDA, "scsi_da", "scsi_da buffers");
static int
+daioctl(struct dev_ioctl_args *ap)
+{
+ int unit;
+ int error = 0;
+ struct buf *bp;
+ struct cam_periph *periph;
+ int byte_count;
+ struct da_softc * softc;
+
+ off_t *del_num = (off_t*)ap->a_data;
+ off_t bytes_left;
+ off_t bytes_start;
+
+ cdev_t dev = ap->a_head.a_dev;
+
+
+ unit = dkunit(dev);
+ periph = cam_extend_get(daperiphs, unit);
+ if (periph == NULL)
+ return(ENXIO);
+ softc = (struct da_softc *)periph->softc;
+
+ switch (ap->a_cmd) {
+ case IOCTLTRIM:
+ {
+
+ bytes_left = del_num[1];
+ bytes_start = del_num[0];
+
+ /* TRIM occurs on 512-byte sectors. */
+ KKASSERT((bytes_left % 512) == 0);
+ KKASSERT((bytes_start% 512) == 0);
+
+
+ /* Break TRIM up into int-sized commands because of b_bcount */
+ while(bytes_left) {
+
+ /*
+ * Rather than than squezing out more blocks in b_bcount
+ * and having to break up the TRIM request in da_start(),
+ * we ensure we can always TRIM this many bytes with one
+ * TRIM command (this happens if the device only
+ * supports one TRIM block).
+ *
+ * With min TRIM blksize of 1, TRIM command free
+ * 4194240 blks(64*65535): each LBA range can address
+ * 65535 blks and there 64 such ranges in a 512-byte
+ * block. And, 4194240 * 512 = 0x7FFF8000
+ *
+ */
+ byte_count = MIN(bytes_left,0x7FFF8000);
+ bp = getnewbuf(0,0,0,1);
+
+ bp->b_cmd = BUF_CMD_FREEBLKS;
+ bp->b_bio1.bio_offset = bytes_start;
+ bp->b_bcount = byte_count;
+ bp->b_bio1.bio_flags |= BIO_SYNC;
+ bp->b_bio1.bio_done = biodone_sync;
+
+ dev_dstrategy(ap->a_head.a_dev, &bp->b_bio1);
+
+ if (biowait(&bp->b_bio1, "TRIM")) {
+ kprintf("Error:%d\n", bp->b_error);
+ return(bp->b_error ? bp->b_error : EIO);
+ }
+ brelse(bp);
+ bytes_left -= byte_count;
+ bytes_start += byte_count;
+ }
+ break;
+ }
+ default:
+ return(EINVAL);
+ }
+
+ return(error);
+}
+
+static int
daopen(struct dev_open_args *ap)
{
cdev_t dev = ap->a_head.a_dev;
@@ -643,6 +739,8 @@ dastrategy(struct dev_strategy_args *ap)
*/
if (bp->b_cmd == BUF_CMD_WRITE || bp->b_cmd == BUF_CMD_FLUSH)
bioqdisksort(&softc->bio_queue_wr, bio);
+ else if (bp->b_cmd == BUF_CMD_FREEBLKS)
+ bioqdisksort(&softc->bio_queue_trim, bio);
else
bioqdisksort(&softc->bio_queue_rd, bio);
@@ -819,6 +917,7 @@ daoninvalidate(struct cam_periph *periph)
* XXX Handle any transactions queued to the card
* with XPT_ABORT_CCB.
*/
+ daflushbioq(&softc->bio_queue_trim, ENXIO);
daflushbioq(&softc->bio_queue_wr, ENXIO);
daflushbioq(&softc->bio_queue_rd, ENXIO);
xpt_print(periph->path, "lost device\n");
@@ -978,6 +1077,18 @@ dasysctlinit(void *context, int pending)
&softc->minimum_cmd_size, 0, dacmdsizesysctl, "I",
"Minimum CDB size");
+ /* Only create the option if the device supports TRIM */
+ if (softc->disk.d_info.d_trimflag) {
+ SYSCTL_ADD_INT(&softc->sysctl_ctx,
+ SYSCTL_CHILDREN(softc->sysctl_tree),
+ OID_AUTO,
+ "trim_enabled",
+ CTLFLAG_RW,
+ &softc->trim_enabled,
+ 0,
+ "Enable TRIM for this device (SSD))");
+ }
+
cam_periph_release(periph);
rel_mplock();
}
@@ -1037,6 +1148,7 @@ daregister(struct cam_periph *periph, void *arg)
softc = kmalloc(sizeof(*softc), M_DEVBUF, M_INTWAIT | M_ZERO);
LIST_INIT(&softc->pending_ccbs);
softc->state = DA_STATE_PROBE;
+ bioq_init(&softc->bio_queue_trim);
bioq_init(&softc->bio_queue_rd);
bioq_init(&softc->bio_queue_wr);
if (SID_IS_REMOVABLE(&cgd->inq_data))
@@ -1044,6 +1156,17 @@ daregister(struct cam_periph *periph, void *arg)
if ((cgd->inq_data.flags & SID_CmdQue) != 0)
softc->flags |= DA_FLAG_TAGGED_QUEUING;
+ /* Used to get TRIM status from AHCI driver */
+ if (cgd->inq_data.vendor_specific1[0] == 1) {
+ /*
+ * max number of lba ranges an SSD can handle in a single
+ * TRIM command. vendor_specific1[1] is the num of 512-byte
+ * blocks the SSD reports that can be passed in a TRIM cmd.
+ */
+ softc->trim_max_ranges =
+ min(cgd->inq_data.vendor_specific1[1] * 64, TRIM_MAX_RANGES);
+ }
+
periph->softc = softc;
cam_extend_set(daperiphs, periph->unit_number, periph);
@@ -1159,6 +1282,8 @@ daregister(struct cam_periph *periph, void *arg)
(DA_DEFAULT_TIMEOUT * hz) / DA_ORDEREDTAG_INTERVAL,
dasendorderedtag, softc);
+
+
return(CAM_REQ_CMP);
}
@@ -1204,6 +1329,79 @@ dastart(struct cam_periph *periph, union ccb *start_ccb)
break;
}
+ /* Run the trim command if not already running */
+ if (!softc->trim_running &&
+ (bio = bioq_first(&softc->bio_queue_trim)) != 0) {
+ struct trim_request *req = &softc->trim_req;
+ struct bio *bio1;
+ int bps = 0, ranges = 0;
+
+ softc->trim_running = 1;
+ bzero(req, sizeof(*req));
+ bio1 = bio;
+ while (1) {
+ uint64_t lba;
+ int count;
+
+ bp = bio1->bio_buf;
+ count = bp->b_bcount / softc->params.secsize;
+ lba = bio1->bio_offset/softc->params.secsize;
+
+ kprintf("trim lba:%llu boff:%llu count:%d\n",
+ (unsigned long long) lba,
+ (unsigned long long) bio1->bio_offset,
+ count);
+
+ bioq_remove(&softc->bio_queue_trim, bio1);
+ while (count > 0) {
+ int c = min(count, 0xffff);
+ int off = ranges * 8;
+
+ req->data[off + 0] = lba & 0xff;
+ req->data[off + 1] = (lba >> 8) & 0xff;
+ req->data[off + 2] = (lba >> 16) & 0xff;
+ req->data[off + 3] = (lba >> 24) & 0xff;
+ req->data[off + 4] = (lba >> 32) & 0xff;
+ req->data[off + 5] = (lba >> 40) & 0xff;
+ req->data[off + 6] = c & 0xff;
+ req->data[off + 7] = (c >> 8) & 0xff;
+ lba += c;
+ count -= c;
+ ranges++;
+ }
+
+ /* Try to merge multiple TRIM requests */
+ req->bios[bps++] = bio1;
+ bio1 = bioq_first(&softc->bio_queue_trim);
+ if (bio1 == NULL ||
+ bio1->bio_buf->b_bcount / softc->params.secsize >
+ (softc->trim_max_ranges - ranges) * 0xffff)
+ break;
+ }
+
+
+ cam_fill_csio(&start_ccb->csio,
+ 1/*retries*/,
+ dadone,
+ CAM_DIR_OUT,
+ MSG_SIMPLE_Q_TAG,
+ req->data,
+ ((ranges +63)/64)*512,
+ SSD_FULL_SIZE,
+ sizeof(struct scsi_rw_6),
+ da_default_timeout*2);
+
+ start_ccb->ccb_h.ccb_state = DA_CCB_TRIM;
+ LIST_INSERT_HEAD(&softc->pending_ccbs,
+ &start_ccb->ccb_h, periph_links.le);
+ start_ccb->csio.ccb_h.func_code = XPT_TRIM;
+ start_ccb->ccb_h.ccb_bio = bio;
+ devstat_start_transaction(&softc->device_stats);
+ xpt_action(start_ccb);
+ xpt_schedule(periph, 1);
+ break;
+ }
+
/*
* Select a read or write buffer to queue. Limit the number
* of tags dedicated to reading or writing, giving reads
@@ -1314,6 +1512,11 @@ dastart(struct cam_periph *periph, union ccb *start_ccb)
);
}
break;
+ case BUF_CMD_FREEBLKS:
+ if (softc->disk.d_info.d_trimflag & DA_FLAG_CAN_TRIM){
+ start_ccb->csio.ccb_h.func_code = XPT_TRIM;
+ break;
+ }
default:
xpt_release_ccb(start_ccb);
start_ccb = NULL;
@@ -1461,6 +1664,7 @@ dadone(struct cam_periph *periph, union ccb *done_ccb)
csio = &done_ccb->csio;
switch (csio->ccb_h.ccb_state & DA_CCB_TYPE_MASK) {
case DA_CCB_BUFFER_IO:
+ case DA_CCB_TRIM:
{
struct buf *bp;
struct bio *bio;
@@ -1555,7 +1759,28 @@ dadone(struct cam_periph *periph, union ccb *done_ccb)
}
devstat_end_transaction_buf(&softc->device_stats, bp);
- biodone(bio);
+ if ((csio->ccb_h.ccb_state & DA_CCB_TYPE_MASK) ==
+ DA_CCB_TRIM) {
+ struct trim_request *req =
+ (struct trim_request *) csio->data_ptr;
+ int i;
+
+ for (i = 1; i < softc->trim_max_ranges &&
+ req->bios[i]; i++) {
+ struct bio *bp1 = req->bios[i];
+
+ bp1->bio_buf->b_resid = bp->b_resid;
+ bp1->bio_buf->b_error = bp->b_error;
+ if (bp->b_flags & B_ERROR)
+ bp1->bio_buf->b_flags |= B_ERROR;
+ biodone(bp1);
+ }
+ softc->trim_running = 0;
+ biodone(bio);
+ xpt_schedule(periph,1);
+ } else
+ biodone(bio);
+
if (mustsched)
xpt_schedule(periph, /*priority*/1);
@@ -1618,6 +1843,7 @@ dadone(struct cam_periph *periph, union ccb *done_ccb)
(uintmax_t)dp->sectors,
dp->secsize, dp->heads, dp->secs_per_track,
dp->cylinders);
+
CAM_SIM_UNLOCK(periph->sim);
info.d_media_blksize = softc->params.secsize;
info.d_media_blocks = softc->params.sectors;
@@ -1737,6 +1963,13 @@ dadone(struct cam_periph *periph, union ccb *done_ccb)
taskqueue_enqueue(taskqueue_thread[mycpuid],
&softc->sysctl_task);
}
+
+ if (softc->trim_max_ranges) {
+ softc->disk.d_info.d_trimflag |= DA_FLAG_CAN_TRIM;
+ kprintf("%s%d: supports TRIM\n",
+ periph->periph_name,
+ periph->unit_number);
+ }
softc->state = DA_STATE_NORMAL;
/*
* Since our peripheral may be invalidated by an error
View
52 sys/dev/disk/ahci/ahci_cam.c
@@ -958,6 +958,17 @@ ahci_xpt_action(struct cam_sim *sim, union ccb *ccb)
break;
}
break;
+ case XPT_TRIM:
+ {
+ scsi_cdb_t cdb;
+ struct ccb_scsiio *csio;
+ csio = &ccb->csio;
+ cdb = (void *)((ccbh->flags & CAM_CDB_POINTER) ?
+ csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
+ cdb->generic.opcode = TRIM;
+ ahci_xpt_scsi_disk_io(ap, atx, ccb);
+ break;
+ }
default:
ccbh->status = CAM_REQ_INVALID;
xpt_done(ccb);
@@ -1067,6 +1078,17 @@ ahci_xpt_scsi_disk_io(struct ahci_port *ap, struct ata_port *atx,
sizeof(rdata->inquiry_data.revision));
ccbh->status = CAM_REQ_CMP;
}
+
+ /*
+ * Use the vendor specific area to set the TRIM status
+ * for scsi_da
+ */
+ if (at->at_identify.support_dsm) {
+ rdata->inquiry_data.vendor_specific1[0] =
+ at->at_identify.support_dsm &ATA_SUPPORT_DSM_TRIM;
+ rdata->inquiry_data.vendor_specific1[1] =
+ at->at_identify.max_dsm_blocks;
+ }
break;
case READ_CAPACITY_16:
if (cdb->read_capacity_16.service_action != SRC16_SERVICE_ACTION) {
@@ -1119,6 +1141,36 @@ ahci_xpt_scsi_disk_io(struct ahci_port *ap, struct ata_port *atx,
xa->flags = 0;
xa->complete = ahci_ata_complete_disk_synchronize_cache;
break;
+ case TRIM:
+ fis = xa->fis;
+ fis->command = ATA_C_DATA_SET_MANAGEMENT;
+ fis->features = (u_int8_t)ATA_SF_DSM_TRIM;
+ fis->features_exp = (u_int8_t)(ATA_SF_DSM_TRIM>> 8);
+
+ xa->flags = ATA_F_WRITE;
+ fis->flags = ATA_H2D_FLAGS_CMD;
+
+ xa->data = csio->data_ptr;
+ xa->datalen = csio->dxfer_len;
+ xa->timeout = ccbh->timeout*50; /* milliseconds */
+
+ fis->sector_count =(u_int8_t)(xa->datalen/512);
+ fis->sector_count_exp =(u_int8_t)((xa->datalen/512)>>8);
+
+ lba = 0;
+ fis->lba_low = (u_int8_t)lba;
+ fis->lba_mid = (u_int8_t)(lba >> 8);
+ fis->lba_high = (u_int8_t)(lba >> 16);
+ fis->lba_low_exp = (u_int8_t)(lba >> 24);
+ fis->lba_mid_exp = (u_int8_t)(lba >> 32);
+ fis->lba_high_exp = (u_int8_t)(lba >> 40);
+
+ fis->device = ATA_H2D_DEVICE_LBA;
+ xa->data = csio->data_ptr;
+
+ xa->complete = ahci_ata_complete_disk_rw;
+ ccbh->status = CAM_REQ_INPROG;
+ break;
case TEST_UNIT_READY:
case START_STOP_UNIT:
case PREVENT_ALLOW:
View
14 sys/dev/disk/ahci/atascsi.h
@@ -23,6 +23,7 @@ struct scsi_link;
* ATA commands
*/
+#define ATA_C_DATA_SET_MANAGEMENT 0x06 /* Data Set Management command */
#define ATA_C_SATA_FEATURE_ENA 0x10
#define ATA_C_READDMA_EXT 0x25
#define ATA_C_READ_LOG_EXT 0x2f
@@ -54,6 +55,7 @@ struct scsi_link;
/*
* ATA SET FEATURES subcommands
*/
+#define ATA_SF_DSM_TRIM 0x01 /* TRIM DSM feature */
#define ATA_SF_WRITECACHE_EN 0x02
#define ATA_SF_SETXFER 0x03
#define ATA_SF_LOOKAHEAD_EN 0xaa
@@ -93,7 +95,10 @@ struct ata_identify {
u_int16_t recmwdma; /* 66 */
u_int16_t minpio; /* 67 */
u_int16_t minpioflow; /* 68 */
- u_int16_t reserved4[2]; /* 69 */
+ u_int16_t support3; /* 69 */
+#define ATA_SUPPORT_RZAT 0x0020
+#define ATA_SUPPORT_DRAT 0x4000
+ u_int16_t reserved4; /* 70 */
u_int16_t typtime[2]; /* 71 */
u_int16_t reserved5[2]; /* 73 */
u_int16_t qdepth; /* 75 */
@@ -123,7 +128,7 @@ struct ata_identify {
u_int16_t streamperf[2]; /* 98 */
u_int16_t addrsecxt[4]; /* 100 */
u_int16_t stream_xfer_p; /* 104 */
- u_int16_t padding1; /* 105 */
+ u_int16_t max_dsm_blocks; /* 105 */
u_int16_t phys_sect_sz; /* 106 */
u_int16_t seek_delay; /* 107 */
u_int16_t naa_ieee_oui; /* 108 */
@@ -141,7 +146,10 @@ struct ata_identify {
#define ATA_SECURE_LOCKED (1<<2)
#define ATA_SECURE_FROZEN (1<<3)
u_int16_t vendor[31]; /* 129 */
- u_int16_t padding3[16]; /* 160 */
+ u_int16_t padding3[9]; /* 160 */
+ u_int16_t support_dsm; /* 169 */
+#define ATA_SUPPORT_DSM_TRIM 0x0001
+ u_int16_t padding5[6]; /* 170 */
u_int16_t curmedser[30]; /* 176 */
u_int16_t sctsupport; /* 206 */
u_int16_t padding4[48]; /* 207 */
View
2  sys/kern/vfs_bio.c
@@ -1995,7 +1995,7 @@ vfs_bio_awrite(struct buf *bp)
*
* MPALMOSTSAFE
*/
-static struct buf *
+struct buf *
getnewbuf(int blkflags, int slptimeo, int size, int maxsize)
{
struct buf *bp;
View
4 sys/kern/vfs_syscalls.c
@@ -356,11 +356,11 @@ sys_mount(struct mount_args *uap)
mp->mnt_kern_flag |= MNTK_WANTRDWR;
mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
- MNT_NOSYMFOLLOW | MNT_IGNORE |
+ MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM |
MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC |
MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
- MNT_NOSYMFOLLOW | MNT_IGNORE |
+ MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM |
MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
/*
* Mount the filesystem.
View
1  sys/sys/buf.h
@@ -426,6 +426,7 @@ struct buf *findblk (struct vnode *, off_t, int);
struct buf *getblk (struct vnode *, off_t, int, int, int);
struct buf *getcacheblk (struct vnode *, off_t, int);
struct buf *geteblk (int);
+struct buf *getnewbuf(int, int, int, int);
void bqhold(struct buf *bp);
void bqdrop(struct buf *bp);
void regetblk(struct buf *bp);
View
1  sys/sys/disk.h
@@ -92,6 +92,7 @@ struct disk_info {
u_int d_ncylinders;
u_int d_secpertrack;
u_int d_secpercyl;
+ u_int d_trimflag;
char *d_serialno;
};
View
1  sys/sys/ioctl_compat.h
@@ -166,5 +166,6 @@ struct sgttyb {
#define OTTYDISC 0
#define NETLDISC 1
#define NTTYDISC 2
+#define IOCTLTRIM _IOW('t', 128, off_t[2])
#endif /* !_SYS_IOCTL_COMPAT_H_ */
View
3  sys/sys/mount.h
@@ -225,6 +225,7 @@ struct mount {
#define MNT_SUIDDIR 0x00100000 /* special handling of SUID on dirs */
#define MNT_SOFTDEP 0x00200000 /* soft updates being done */
#define MNT_NOSYMFOLLOW 0x00400000 /* do not follow symlinks */
+#define MNT_TRIM 0x01000000 /* Enable online FS trimming */
#define MNT_NOATIME 0x10000000 /* disable update of file access time */
#define MNT_NOCLUSTERR 0x40000000 /* disable cluster read */
#define MNT_NOCLUSTERW 0x80000000 /* disable cluster write */
@@ -262,7 +263,7 @@ struct mount {
MNT_LOCAL | MNT_USER | MNT_QUOTA | \
MNT_ROOTFS | MNT_NOATIME | MNT_NOCLUSTERR| \
MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \
- MNT_IGNORE | MNT_NOSYMFOLLOW | MNT_EXPUBLIC )
+ MNT_IGNORE | MNT_NOSYMFOLLOW | MNT_EXPUBLIC | MNT_TRIM)
/*
* External filesystem command modifier flags.
* Unmount can use the MNT_FORCE flag.
View
107 sys/vfs/ufs/ffs_alloc.c
@@ -40,6 +40,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
+#include <sys/buf2.h>
#include <sys/conf.h>
#include <sys/proc.h>
#include <sys/vnode.h>
@@ -48,6 +49,7 @@
#include <sys/sysctl.h>
#include <sys/syslog.h>
+#include <sys/taskqueue.h>
#include <machine/inttypes.h>
#include "quota.h"
@@ -64,6 +66,8 @@ typedef ufs_daddr_t allocfcn_t (struct inode *ip, int cg, ufs_daddr_t bpref,
static ufs_daddr_t ffs_alloccg (struct inode *, int, ufs_daddr_t, int);
static ufs_daddr_t
ffs_alloccgblk (struct inode *, struct buf *, ufs_daddr_t);
+static void ffs_blkfree_cg(struct fs *, struct vnode *, cdev_t , ino_t,
+ uint32_t , ufs_daddr_t, long );
#ifdef DIAGNOSTIC
static int ffs_checkblk (struct inode *, ufs_daddr_t, long);
#endif
@@ -1475,36 +1479,35 @@ ffs_nodealloccg(struct inode *ip, int cg, ufs_daddr_t ipref, int mode)
* block reassembly is checked.
*/
void
-ffs_blkfree(struct inode *ip, ufs_daddr_t bno, long size)
+ffs_blkfree_cg(struct fs * fs, struct vnode * i_devvp, cdev_t i_dev, ino_t i_number,
+ uint32_t i_din_uid, ufs_daddr_t bno, long size)
{
- struct fs *fs;
struct cg *cgp;
struct buf *bp;
ufs_daddr_t blkno;
int i, error, cg, blk, frags, bbase;
uint8_t *blksfree;
- fs = ip->i_fs;
- VOP_FREEBLKS(ip->i_devvp, fsbtodoff(fs, bno), size);
+ VOP_FREEBLKS(i_devvp, fsbtodoff(fs, bno), size);
if ((uint)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
kprintf("dev=%s, bno = %ld, bsize = %ld, size = %ld, fs = %s\n",
- devtoname(ip->i_dev), (long)bno, (long)fs->fs_bsize, size,
+ devtoname(i_dev), (long)bno, (long)fs->fs_bsize, size,
fs->fs_fsmnt);
panic("ffs_blkfree: bad size");
}
cg = dtog(fs, bno);
if ((uint)bno >= fs->fs_size) {
kprintf("bad block %ld, ino %lu\n",
- (long)bno, (u_long)ip->i_number);
- ffs_fserr(fs, ip->i_uid, "bad block");
+ (long)bno, (u_long)i_number);
+ ffs_fserr(fs, i_din_uid, "bad block");
return;
}
/*
* Load the cylinder group
*/
- error = bread(ip->i_devvp, fsbtodoff(fs, cgtod(fs, cg)),
+ error = bread(i_devvp, fsbtodoff(fs, cgtod(fs, cg)),
(int)fs->fs_cgsize, &bp);
if (error) {
brelse(bp);
@@ -1526,7 +1529,7 @@ ffs_blkfree(struct inode *ip, ufs_daddr_t bno, long size)
blkno = fragstoblks(fs, bno);
if (!ffs_isfreeblock(fs, blksfree, blkno)) {
kprintf("dev = %s, block = %ld, fs = %s\n",
- devtoname(ip->i_dev), (long)bno, fs->fs_fsmnt);
+ devtoname(i_dev), (long)bno, fs->fs_fsmnt);
panic("ffs_blkfree: freeing free block");
}
ffs_setblock(fs, blksfree, blkno);
@@ -1564,7 +1567,7 @@ ffs_blkfree(struct inode *ip, ufs_daddr_t bno, long size)
for (i = 0; i < frags; i++) {
if (isset(blksfree, bno + i)) {
kprintf("dev = %s, block = %ld, fs = %s\n",
- devtoname(ip->i_dev), (long)(bno + i),
+ devtoname(i_dev), (long)(bno + i),
fs->fs_fsmnt);
panic("ffs_blkfree: freeing free frag");
}
@@ -1601,6 +1604,90 @@ ffs_blkfree(struct inode *ip, ufs_daddr_t bno, long size)
bdwrite(bp);
}
+struct ffs_blkfree_trim_params {
+ struct task task;
+ ufs_daddr_t bno;
+ long size;
+
+ /*
+ * With TRIM, inode pointer is gone in the callback but we still need
+ * the following fields for ffs_blkfree_cg()
+ */
+ struct vnode *i_devvp;
+ struct fs *i_fs;
+ cdev_t i_dev;
+ ino_t i_number;
+ uint32_t i_din_uid;
+};
+
+
+static void
+ffs_blkfree_trim_task(void *ctx, int pending)
+{
+ struct ffs_blkfree_trim_params *tp;
+
+ tp = ctx;
+ ffs_blkfree_cg(tp->i_fs, tp->i_devvp, tp->i_dev, tp->i_number,
+ tp->i_din_uid, tp->bno, tp->size);
+ kfree(tp, M_TEMP);
+}
+
+
+
+static void
+ffs_blkfree_trim_completed(struct bio *biop)
+{
+ struct buf *bp = biop->bio_buf;
+ struct ffs_blkfree_trim_params *tp;
+
+ tp = bp->b_bio1.bio_caller_info1.ptr;
+ TASK_INIT(&tp->task, 0, ffs_blkfree_trim_task, tp);
+ tp = biop->bio_caller_info1.ptr;
+ taskqueue_enqueue(taskqueue_swi, &tp->task);
+ biodone(biop);
+}
+
+
+/*
+ * If TRIM is enabled, we TRIM the blocks first then free them. We do this
+ * after TRIM is finished and the callback handler is called. The logic here
+ * is that we free the blocks before updating the bitmap so that we don't
+ * reuse a block before we actually trim it, which would result in trimming
+ * a valid block.
+ */
+void
+ffs_blkfree(struct inode *ip, ufs_daddr_t bno, long size)
+{
+ struct ufsmount *ump = VFSTOUFS(ITOV(ip)->v_mount);;
+ struct ffs_blkfree_trim_params *tp;
+
+ if (!(ump->um_mountp->mnt_flag & MNT_TRIM)) {
+ ffs_blkfree_cg(ip->i_fs, ip->i_devvp,ip->i_dev,ip->i_number,
+ ip->i_uid, bno, size);
+ return;
+ }
+
+ struct buf *bp;
+
+ tp = kmalloc(sizeof(struct ffs_blkfree_trim_params), M_TEMP, M_WAITOK);
+ tp->bno = bno;
+ tp->i_fs= ip->i_fs;
+ tp->i_devvp = ip->i_devvp;
+ tp->i_dev = ip->i_dev;
+ tp->i_din_uid = ip->i_uid;
+ tp->i_number = ip->i_number;
+ tp->size = size;
+
+ bp = getnewbuf(0,0,0,1);
+ BUF_KERNPROC(bp);
+ bp->b_cmd = BUF_CMD_FREEBLKS;
+ bp->b_bio1.bio_offset = fsbtodoff(ip->i_fs, bno);
+ bp->b_bcount = size;
+ bp->b_bio1.bio_caller_info1.ptr = tp;
+ bp->b_bio1.bio_done = ffs_blkfree_trim_completed;
+ vn_strategy(ip->i_devvp, &bp->b_bio1);
+}
+
#ifdef DIAGNOSTIC
/*
* Verify allocation of a block or fragment. Returns true if block or
View
1  sys/vfs/ufs/ffs_vfsops.c
@@ -37,6 +37,7 @@
#include "opt_quota.h"
+#include <sys/disk.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
Please sign in to comment.
Something went wrong with that request. Please try again.